extern crate nom; use std::rc::Rc; use std::str::FromStr; use nom::IResult; use nom::character::complete::{one_of, space0, alphanumeric0}; use nom::bytes::complete::{tag, take, take_while, take_while1, take_until}; use nom::combinator::{cut, cond, map, map_res, value, opt, verify}; use nom::multi::{separated_list, separated_nonempty_list, many1, many0}; use nom::error::{context, ParseError, VerboseError}; use nom::branch::alt; use nom::sequence::{pair, tuple, delimited, preceded}; use crate::ast::*; use crate::builtin::Builtin; type ParseResult<'a, T> = IResult<&'a str, T, VerboseError<&'a str>>; pub fn ws, F>(parser: F) -> impl Fn(I) -> IResult where I: nom::InputTakeAtPosition, ::Item: nom::AsChar + Clone, F: Fn(I) -> IResult, { delimited(space0, parser, space0) } fn statement_sep(text: &str) -> ParseResult<()> { value((), one_of("\n;"))(text) } fn single_alphabetic_character(text: &str) -> ParseResult { let p = verify(take(1usize), |s: &str| s.chars().nth(0).map(|c| c.is_alphabetic()).unwrap_or(false)); map(p, |s: &str| s.chars().nth(0).unwrap())(text) } fn single_alphanumeric_character(text: &str) -> ParseResult { let p = verify(take(1usize), |s: &str| s.chars().nth(0).map(|c| c.is_alphanumeric() || c == '_').unwrap_or(false)); map(p, |s: &str| s.chars().nth(0).unwrap())(text) } fn identifier(text: &str) -> ParseResult> { use nom::character::complete::char; map(alt(( pair(char('_'), many1(single_alphanumeric_character)), pair(single_alphabetic_character, many0(single_alphanumeric_character)) )), |(first, rest): (char, Vec)| Rc::new(format!("{}{}", first, rest.into_iter().collect::())) )(text) } const OPERATOR_CHARS: &'static str = "~`!@#$%^&*-+=<>?/|"; fn operator(text: &str) -> ParseResult> { many1(one_of(OPERATOR_CHARS))(text) } fn binop(text: &str) -> ParseResult { context("Binop", map( operator, |op| BinOp::from_sigil(&op.into_iter().collect::()) ))(text) } fn bool_literal(text: &str) -> ParseResult { let p = alt(( value(true, tag("true")), value(false, tag("false")) )); map(p, ExpressionKind::BoolLiteral)(text) } fn number_literal(text: &str) -> ParseResult { let num_lit = many1(alt(( map(one_of("1234567890"), |s: char| Some(s)), value(None, nom::character::complete::char('_')), ))); let (text, n) = map_res(num_lit, |digits: Vec>| { let num_str: String = digits.into_iter().filter_map(|x| x).collect(); u64::from_str_radix(&num_str, 10) })(text)?; Ok((text, ExpressionKind::NatLiteral(n))) } fn binary_literal(text: &str) -> ParseResult { let p = preceded(tag("0b"), cut(take_while1(|c: char| c == '0' || c == '1'))); let (rest, n): (&str, u64) = map_res( p, |hex_str: &str| u64::from_str_radix(hex_str, 2) )(text)?; let expr = ExpressionKind::NatLiteral(n); Ok((rest, expr)) } fn hex_literal(text: &str) -> ParseResult { let p = preceded(tag("0x"), cut(take_while1(|c: char| c.is_digit(16)))); let (rest, n): (&str, u64) = map_res( p, |hex_str: &str| u64::from_str_radix(hex_str, 16) )(text)?; let expr = ExpressionKind::NatLiteral(n); Ok((rest, expr)) } fn string_literal(text: &str) -> ParseResult { use nom::character::complete::char; let (text, string_output) = delimited( char('"'), take_until("\""), char('"') )(text)?; let expr = ExpressionKind::StringLiteral(Rc::new(string_output.to_string())); Ok((text, expr)) } fn literal(text: &str) -> ParseResult { alt(( string_literal, hex_literal, binary_literal, number_literal, bool_literal, ))(text) } fn paren_expr(text: &str) -> ParseResult { use nom::character::complete::char; context("Paren expression", delimited(char('('), expression_kind, char(')')))(text) } fn prefix_op(text: &str) -> ParseResult { use nom::character::complete::char; let p = alt((char('+'), char('-'), char('!'))); map(p, |sigil| PrefixOp::from_str(&sigil.to_string()).unwrap())(text) } fn qualified_name(text: &str) -> ParseResult { map( separated_nonempty_list(tag("::"), identifier), |components| QualifiedName { id: ItemId::new(0), components } )(text) } fn identifier_expr(text: &str) -> ParseResult { map(qualified_name, ExpressionKind::Value)(text) } fn primary_expr(text: &str) -> ParseResult { // primary := literal | paren_expr | if_expr | for_expr | while_expr | identifier_expr | lambda_expr | anonymous_struct | list_expr alt(( if_expr, for_expr, literal, paren_expr, identifier_expr, ))(text) } fn for_expr(text: &str) -> ParseResult { //TODO do I need something like no struct literal here? let en = alt(( map(enumerator, |e| vec![e]), delimited(tag("{"), enumerators, tag("}")) )); preceded(tag("for"), map(tuple((en, for_expr_body)), |(enumerators, body)| ExpressionKind::ForExpression { enumerators, body: Box::new(body) } ))(text) } fn enumerators(text: &str) -> ParseResult> { separated_nonempty_list(alt((value((), tag(",")), statement_sep)), enumerator)(text) } fn enumerator(text: &str) -> ParseResult { map( tuple((identifier, tag("<-"), expression)), |(id, _, generator)| Enumerator { id, generator } )(text) } fn for_expr_body(text: &str) -> ParseResult { unimplemented!() } fn invocation_argument(text: &str) -> ParseResult { use nom::character::complete::char; alt(( value(InvocationArgument::Ignored, pair(char('_'), alphanumeric0)), map(expression_kind, |kind: ExpressionKind| InvocationArgument::Positional( Expression { id: ItemId::new(0), kind, type_anno: None })) //map(identifier, |id: Rc| ))(text) } fn if_expr(text: &str) -> ParseResult { let p = preceded(tag("if"), pair(ws(discriminator), ws(if_expr_body))); map(p, |(discriminator, body)| { let discriminator = discriminator.map(Box::new); let body = Box::new(body); ExpressionKind::IfExpression { discriminator, body } }) (text) } fn discriminator(text: &str) -> ParseResult> { use nom::combinator::verify; cond(text.chars().next().map(|c| c != '{').unwrap_or(true), expression )(text) } fn if_expr_body(text: &str) -> ParseResult { alt(( preceded(tag("then"), simple_conditional), preceded(tag("is"), simple_pattern_match), cond_block, ))(text) } fn simple_conditional(text: &str) -> ParseResult { map( pair(expr_or_block, else_case), |(then_case, else_case)| IfExpressionBody::SimpleConditional { then_case, else_case } )(text) } fn else_case(text: &str) -> ParseResult> { opt(preceded(tag("else"), expr_or_block))(text) } fn simple_pattern_match(text: &str) -> ParseResult { let p = tuple((pattern, tag("then"), expr_or_block, else_case)); map(p, |(pattern, _, then_case, else_case)| IfExpressionBody::SimplePatternMatch { pattern, then_case, else_case } )(text) } fn pattern(text: &str) -> ParseResult { use nom::character::complete::char; let t = delimited(char('('), separated_nonempty_list(char(','), pattern), char(')') ); alt(( map(t, |patterns| Pattern::TuplePattern(patterns)), simple_pattern, ))(text) } fn simple_pattern(text: &str) -> ParseResult { alt(( value(Pattern::Ignored, tag("_")), tuple_struct_pattern, record_pattern, map(pattern_literal, Pattern::Literal), map(qualified_name, Pattern::VarOrName), ))(text) } fn tuple_struct_pattern(text: &str) -> ParseResult { unimplemented!() } fn record_pattern(text: &str) -> ParseResult { unimplemented!() } fn pattern_literal(text: &str) -> ParseResult { use PatternLiteral::*; use nom::character::complete::char; alt(( value(BoolPattern(true), tag("true")), value(BoolPattern(false), tag("false")), map(delimited(char('"'), take_until("\""), char('"')), |s: &str| StringPattern(Rc::new(s.to_string()))), ))(text) //TODO handle signed_number_literal } fn cond_block(text: &str) -> ParseResult { use nom::character::complete::char; //TODO maybe change this bit of syntax let comma_or_delimitor = alt((value((), char(',')), statement_sep)); let p = delimited(char('{'), separated_nonempty_list(comma_or_delimitor, cond_arm), char('}')); map(p, IfExpressionBody::CondList)(text) } fn cond_arm(text: &str) -> ParseResult { let variant_1 = map( tuple((condition, guard, tag("then"), expr_or_block)), |(condition, guard, _, body)| ConditionArm { condition, guard, body } ); let variant_2 = map( preceded(tag("else"), expr_or_block), |body| ConditionArm { condition: Condition::Else, guard: None, body } ); alt((variant_1, variant_2))(text) } fn condition(text: &str) -> ParseResult { alt(( map(preceded(tag("is"), pattern), Condition::Pattern), map(tuple((binop, expression)), |(op, expr)| Condition::TruncatedOp(op, expr)), map(expression, Condition::Expression), ))(text) } fn guard(text: &str) -> ParseResult> { opt(preceded(tag("if"), expression))(text) } fn expr_or_block(text: &str) -> ParseResult { //TODO fix alt((block, map(expression, |expr| vec![Statement { id: ItemId::new(0), kind: StatementKind::Expression(expr)}])))(text) } fn block(text: &str) -> ParseResult { use nom::character::complete::char; //TODO fix this so it can handle nested statements delimited(char('{'), separated_nonempty_list(statement_sep, map(expression, |e| Statement { id: ItemId::new(0), kind: StatementKind::Expression(e) }) ), char('}'))(text) } fn call_expr(text: &str) -> ParseResult { use nom::character::complete::char; let parse_call = opt( delimited(char('('), separated_list(char(','), invocation_argument), char(')')) ); let p = pair(primary_expr, parse_call); map(p, |(expr, call_part)| if let Some(arguments) = call_part { let f = bx!(Expression { id: ItemId::new(0), kind: expr, type_anno: None }); ExpressionKind::Call { f, arguments } } else { expr })(text) } fn prefix_expr(text: &str) -> ParseResult { let (text, pfx) = ws(opt(prefix_op))(text)?; let (text, result) = call_expr(text)?; match pfx { None => Ok((text, result)), Some(pfx) => { let exp = Expression { id: ItemId::new(0), kind: result, type_anno: None }; Ok((text, ExpressionKind::PrefixExp(pfx, Box::new(exp)))) } } } // this implements Pratt parsing, see http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ fn precedence_expr(text: &str) -> ParseResult { fn inner_precedence_expr(input: &str, precedence: i32) -> ParseResult { let (mut outer_rest, mut lhs) = prefix_expr(input)?; loop { let (rest, _) = space0(outer_rest)?; let (rest, maybe_binop) = opt(binop)(rest)?; let (new_precedence, binop) = match maybe_binop { Some(binop) => (binop.precedence(), binop), None => break, }; if precedence >= new_precedence { break; } let (rest, _) = space0(rest)?; let (rest, rhs) = inner_precedence_expr(rest, new_precedence)?; outer_rest = rest; lhs = ExpressionKind::BinExp(binop, bx!(Expression::new(ItemId::new(0), lhs)), bx!(Expression::new(ItemId::new(0), rhs)) ); } Ok((outer_rest, lhs)) } context("Precedence expression", |input| inner_precedence_expr(input, BinOp::min_precedence()) )(text) } fn expression_kind(text: &str) -> ParseResult { context("Expression kind", precedence_expr)(text) } fn type_anno(text: &str) -> ParseResult { use nom::character::complete::char; preceded(ws(char(':')), ws(type_name))(text) } fn type_name(text: &str) -> ParseResult { //TODO incomplete let (text, name) = identifier(text)?; let id = TypeIdentifier::Singleton(TypeSingletonName { name, params: vec![] }); Ok((text, id)) } fn expression(text: &str) -> ParseResult { let (rest, (kind, type_anno)) = pair(expression_kind, opt(type_anno))(text)?; let expr = Expression { id: ItemId::new(0), kind, type_anno }; Ok((rest, expr)) } pub fn perform_parsing(input: &str) -> Result { let output = match expression(input) { Ok((rest, ast)) => format!("{:?} (rest: {})", ast, rest), Err(nom::Err::Incomplete(needed)) => format!("Incomplete: {:?}" ,needed), Err(nom::Err::Error(verbose_error) | nom::Err::Failure(verbose_error)) => { format!("Verbose Error: ` {:?} `", verbose_error) //nom::error::convert_error(input, verbose_error) } }; Ok(output) }