use nom::{ Err, branch::alt, bytes::complete::{take_till, tag}, character::complete::{alpha1, alphanumeric0, not_line_ending,none_of, char, one_of, space0, space1, multispace0, line_ending}, combinator::{opt, peek, not, value, map, recognize}, error::{context, VerboseError, ParseError}, multi::{fold_many1, many1, many0, separated_list1, separated_list0}, sequence::{pair, tuple, preceded}, IResult, Parser, }; use nom_locate::{position, LocatedSpan}; use std::rc::Rc; type Span<'a> = LocatedSpan<&'a str>; type ParseResult<'a, O> = IResult, O, VerboseError>>; use crate::ast::*; fn rc_string(s: &str) -> Rc { Rc::new(s.to_string()) } fn tok<'a, O>(input_parser: impl Parser, O, VerboseError>>) -> impl FnMut(Span<'a>) -> IResult, O, VerboseError>> { context("tok", map(tuple((ws0, input_parser)), |(_, output)| output)) } fn kw<'a>(keyword_str: &'static str) -> impl FnMut(Span<'a>) -> ParseResult<()> { context("keyword", tok(value((), tag(keyword_str)))) } // whitespace does consume at least one piece of whitespace - use ws0 for maybe none fn whitespace(input: Span) -> ParseResult<()> { context("whitespace", alt(( block_comment, line_comment, value((), space1), )))(input) } fn ws0(input: Span) -> ParseResult<()> { context("WS0", value((), many0(whitespace)))(input) } fn line_comment(input: Span) -> ParseResult<()> { value((), tuple((tag("//"), not_line_ending)), )(input) } fn block_comment(input: Span) -> ParseResult<()> { context("Block-comment", value((), tuple(( tag("/*"), many0(alt(( value((), none_of("*/")), value((), none_of("/*")), block_comment, ))), tag("*/") ))))(input) } fn statement_delimiter(input: Span) -> ParseResult<()> { tok(alt(( value((), line_ending), value((), char(';')) )) )(input) } fn block(input: Span) -> ParseResult { context("block", map( tuple(( tok(char('{')), many0(statement_delimiter), separated_list0(statement_delimiter, statement), many0(statement_delimiter), tok(char('}')), )), |(_, _, items, _, _)| items.into()))(input) } fn statement(input: Span) -> ParseResult { context("Parsing-statement", map(expression, |expr| Statement { id: Default::default(), location: Default::default(), kind: StatementKind::Expression(expr), }))(input) } fn expression(input: Span) -> ParseResult { map(pair(expression_kind, opt(type_anno)), |(kind, maybe_anno)| { Expression::new(Default::default(), kind) })(input) } fn type_anno(input: Span) -> ParseResult { preceded(kw(":"), type_identifier)(input) } fn type_identifier(input: Span) -> ParseResult { /* alt(( tuple((kw("("), separated_list0(kw(","), type_identifier), kw(")"))), type_singleton_name ))(input) */ unimplemented!() } fn type_singleton_name(input: Span) -> ParseResult { unimplemented!() } pub fn expression_kind(input: Span) -> ParseResult { context("expression-kind", primary_expr)(input) } fn primary_expr(input: Span) -> ParseResult { context("primary-expr", alt(( number_literal, bool_literal, identifier_expr, )))(input) } fn identifier_expr(input: Span) -> ParseResult { context("identifier-expr", map(qualified_identifier, ExpressionKind::Value))(input) } fn qualified_identifier(input: Span) -> ParseResult { tok( map( separated_list1(tag("::"), map(identifier, |x| rc_string(x.fragment()))), |items| QualifiedName { id: Default::default(), components: items } ))(input) } fn identifier(input: Span) -> ParseResult { recognize( tuple(( alt((tag("_"), alpha1)), alphanumeric0, )))(input) } fn bool_literal(input: Span) -> ParseResult { context("bool-literal", alt(( map(kw("true"), |_| ExpressionKind::BoolLiteral(true)), map(kw("false"), |_| ExpressionKind::BoolLiteral(false)), )))(input) } fn number_literal(input: Span) -> ParseResult { map(alt((tok(hex_literal), tok(bin_literal), tok(dec_literal))), ExpressionKind::NatLiteral)(input) } fn dec_literal(input: Span) -> ParseResult { map(digits(digit_group_dec), |chars: Vec| { let s: String = chars.into_iter().collect(); s.parse().unwrap() })(input) } fn hex_literal(input: Span) -> ParseResult { map(preceded(alt((tag("0x"), tag("0X"))), digits(digit_group_hex)), |chars: Vec| { let s: String = chars.into_iter().collect(); parse_hex(&s).unwrap() })(input) } fn bin_literal(input: Span) -> ParseResult { map(preceded(alt((tag("0b"), tag("0B"))), digits(digit_group_bin)), |chars: Vec| { let s: String = chars.into_iter().collect(); parse_binary(&s).unwrap() })(input) } fn digits<'a, E: ParseError>>( digit_type: impl Parser, Vec, E>, ) -> impl FnMut(Span<'a>) -> IResult, Vec, E> { map(separated_list1(many1(char('_')), digit_type), |items: Vec>| { items.into_iter().flatten().collect() }) } fn digit_group_dec(input: Span) -> ParseResult> { many1(one_of("0123456789"))(input) } fn digit_group_hex(input: Span) -> ParseResult> { many1(one_of("0123456789abcdefABCDEF"))(input) } fn digit_group_bin(input: Span) -> ParseResult> { many1(one_of("01"))(input) } fn parse_binary(digits: &str) -> Result { let mut result: u64 = 0; let mut multiplier = 1; for d in digits.chars().rev() { match d { '1' => result += multiplier, '0' => (), '_' => continue, _ => unreachable!(), } multiplier = match multiplier.checked_mul(2) { Some(m) => m, None => return Err("Binary expression will overflow"), } } Ok(result) } fn parse_hex(digits: &str) -> Result { let mut result: u64 = 0; let mut multiplier: u64 = 1; for d in digits.chars().rev() { if d == '_' { continue; } match d.to_digit(16) { Some(n) => result += n as u64 * multiplier, None => return Err("Internal parser error: invalid hex digit"), } multiplier = match multiplier.checked_mul(16) { Some(m) => m, None => return Err("Hexadecimal expression will overflow"), } } Ok(result) } #[cfg(test)] mod test { use pretty_assertions::assert_eq; use super::*; fn rc(s: &str) -> Rc { Rc::new(s.to_owned()) } macro_rules! qn { ( $( $component:ident),* ) => { { let mut components = vec![]; $( components.push(rc(stringify!($component))); )* QualifiedName { components, id: Default::default() } } }; } macro_rules! span { ($func:expr, $input:expr) => { $func(Span::new($input)).map(|(span, x)| (*span.fragment(), x)) }; } #[test] fn combinator_test1() { assert_eq!(span!(digits(digit_group_dec), "342"), Ok(("", vec!['3', '4', '2']))); assert_eq!(span!(bin_literal, "0b1111qsdf"), Ok(("qsdf", 15))); } #[test] fn combinator_test_ws0() { assert_eq!(span!(block_comment, "/*yolo*/"), Ok(("", ()))); assert_eq!(span!(block_comment, "/*yolo*/ jumpy /*nah*/"), Ok((" jumpy /*nah*/", ()))); assert_eq!(span!(ws0, "/* yolo */ "), Ok(("", ()))); assert_eq!(span!(ws0, "/* /* no */ yolo */ "), Ok(("", ()))); } #[test] fn combinator_test2() { for s in [" 15", " 0b1111", " 1_5_", "0XF__", "0Xf"].iter() { assert_eq!(span!(expression_kind, s).unwrap().1, ExpressionKind::NatLiteral(15)); } assert_eq!(span!(expression_kind, " /*gay*/ true").unwrap().1, ExpressionKind::BoolLiteral(true)); assert_eq!(span!(expression_kind, " /*yolo*/ barnaby").unwrap().1, ExpressionKind::Value(qn!(barnaby))); } #[test] fn combinator_test3() { let source = "{}"; assert_eq!(span!(block, source).unwrap().1, vec![].into()); let source = r#"{ //hella 4_5 //bog 11; /*chutney*/0xf }"#; let parsed = span!(block, source).map_err(|err| match err { Err::Error(err) | Err::Failure(err) => { let err = VerboseError { errors: err.errors.into_iter().map(|(sp, kind)| (*sp.fragment(), kind)).collect() }; nom::error::convert_error(source, err) }, _ => panic!() }); if let Err(err) = parsed { println!("{}", err); panic!("parse error desu!"); } assert_eq!(parsed.unwrap().1, vec![ Statement { id: Default::default(), location: Default::default(), kind: StatementKind::Expression(Expression::new(Default::default(), ExpressionKind::NatLiteral(45))) }, Statement { id: Default::default(), location: Default::default(), kind: StatementKind::Expression(Expression::new(Default::default(), ExpressionKind::NatLiteral(11))) }, Statement { id: Default::default(), location: Default::default(), kind: StatementKind::Expression(Expression::new(Default::default(), ExpressionKind::NatLiteral(15))) }, ].into()); } }