422 lines
13 KiB
Rust
422 lines
13 KiB
Rust
extern crate nom;
|
|
|
|
use std::rc::Rc;
|
|
use std::str::FromStr;
|
|
|
|
use nom::IResult;
|
|
use nom::character::complete::{one_of, space0, alphanumeric0};
|
|
use nom::bytes::complete::{tag, take, take_while, take_while1, take_until};
|
|
use nom::combinator::{cut, cond, map, map_res, value, opt, verify};
|
|
use nom::multi::{separated_list, separated_nonempty_list, many1, many0};
|
|
use nom::error::{context, ParseError, VerboseError};
|
|
use nom::branch::alt;
|
|
use nom::sequence::{pair, tuple, delimited, preceded};
|
|
|
|
use crate::ast::*;
|
|
use crate::builtin::Builtin;
|
|
|
|
type ParseResult<'a, T> = IResult<&'a str, T, VerboseError<&'a str>>;
|
|
|
|
pub fn ws<I, O, E: ParseError<I>, F>(parser: F) -> impl Fn(I) -> IResult<I, O, E>
|
|
where
|
|
I: nom::InputTakeAtPosition,
|
|
<I as nom::InputTakeAtPosition>::Item: nom::AsChar + Clone,
|
|
F: Fn(I) -> IResult<I, O, E>,
|
|
{
|
|
delimited(space0, parser, space0)
|
|
}
|
|
|
|
fn statement_sep(text: &str) -> ParseResult<()> {
|
|
value((), one_of("\n;"))(text)
|
|
}
|
|
|
|
fn single_alphabetic_character(text: &str) -> ParseResult<char> {
|
|
let p = verify(take(1usize), |s: &str| s.chars().nth(0).map(|c| c.is_alphabetic()).unwrap_or(false));
|
|
map(p, |s: &str| s.chars().nth(0).unwrap())(text)
|
|
}
|
|
|
|
fn single_alphanumeric_character(text: &str) -> ParseResult<char> {
|
|
let p = verify(take(1usize), |s: &str| s.chars().nth(0).map(|c| c.is_alphanumeric() || c == '_').unwrap_or(false));
|
|
map(p, |s: &str| s.chars().nth(0).unwrap())(text)
|
|
}
|
|
|
|
fn identifier(text: &str) -> ParseResult<Rc<String>> {
|
|
use nom::character::complete::char;
|
|
map(alt((
|
|
pair(char('_'), many1(single_alphanumeric_character)),
|
|
pair(single_alphabetic_character, many0(single_alphanumeric_character))
|
|
)),
|
|
|(first, rest): (char, Vec<char>)| Rc::new(format!("{}{}", first, rest.into_iter().collect::<String>()))
|
|
)(text)
|
|
}
|
|
|
|
const OPERATOR_CHARS: &'static str = "~`!@#$%^&*-+=<>?/|";
|
|
fn operator(text: &str) -> ParseResult<Vec<char>> {
|
|
many1(one_of(OPERATOR_CHARS))(text)
|
|
}
|
|
|
|
fn binop(text: &str) -> ParseResult<BinOp> {
|
|
context("Binop", map(
|
|
operator,
|
|
|op| BinOp::from_sigil(&op.into_iter().collect::<String>())
|
|
))(text)
|
|
}
|
|
|
|
fn bool_literal(text: &str) -> ParseResult<ExpressionKind> {
|
|
let p = alt((
|
|
value(true, tag("true")),
|
|
value(false, tag("false"))
|
|
));
|
|
map(p, ExpressionKind::BoolLiteral)(text)
|
|
}
|
|
|
|
fn number_literal(text: &str) -> ParseResult<ExpressionKind> {
|
|
let num_lit = many1(alt((
|
|
map(one_of("1234567890"), |s: char| Some(s)),
|
|
value(None, nom::character::complete::char('_')),
|
|
)));
|
|
|
|
let (text, n) = map_res(num_lit,
|
|
|digits: Vec<Option<char>>| {
|
|
let num_str: String = digits.into_iter().filter_map(|x| x).collect();
|
|
u64::from_str_radix(&num_str, 10)
|
|
})(text)?;
|
|
|
|
Ok((text, ExpressionKind::NatLiteral(n)))
|
|
}
|
|
|
|
fn binary_literal(text: &str) -> ParseResult<ExpressionKind> {
|
|
let p = preceded(tag("0b"), cut(take_while1(|c: char| c == '0' || c == '1')));
|
|
let (rest, n): (&str, u64) = map_res(
|
|
p, |hex_str: &str| u64::from_str_radix(hex_str, 2)
|
|
)(text)?;
|
|
let expr = ExpressionKind::NatLiteral(n);
|
|
Ok((rest, expr))
|
|
}
|
|
|
|
fn hex_literal(text: &str) -> ParseResult<ExpressionKind> {
|
|
let p = preceded(tag("0x"), cut(take_while1(|c: char| c.is_digit(16))));
|
|
let (rest, n): (&str, u64) = map_res(
|
|
p, |hex_str: &str| u64::from_str_radix(hex_str, 16)
|
|
)(text)?;
|
|
let expr = ExpressionKind::NatLiteral(n);
|
|
Ok((rest, expr))
|
|
}
|
|
|
|
fn string_literal(text: &str) -> ParseResult<ExpressionKind> {
|
|
use nom::character::complete::char;
|
|
let (text, string_output) = delimited(
|
|
char('"'), take_until("\""), char('"')
|
|
)(text)?;
|
|
let expr = ExpressionKind::StringLiteral(Rc::new(string_output.to_string()));
|
|
Ok((text, expr))
|
|
}
|
|
|
|
fn literal(text: &str) -> ParseResult<ExpressionKind> {
|
|
alt((
|
|
string_literal,
|
|
hex_literal,
|
|
binary_literal,
|
|
number_literal,
|
|
bool_literal,
|
|
))(text)
|
|
}
|
|
|
|
fn paren_expr(text: &str) -> ParseResult<ExpressionKind> {
|
|
use nom::character::complete::char;
|
|
context("Paren expression", delimited(char('('), expression_kind, char(')')))(text)
|
|
}
|
|
|
|
fn prefix_op(text: &str) -> ParseResult<PrefixOp> {
|
|
use nom::character::complete::char;
|
|
let p = alt((char('+'), char('-'), char('!')));
|
|
map(p, |sigil| PrefixOp::from_str(&sigil.to_string()).unwrap())(text)
|
|
}
|
|
|
|
fn qualified_name(text: &str) -> ParseResult<QualifiedName> {
|
|
map(
|
|
separated_nonempty_list(tag("::"), identifier),
|
|
|components| QualifiedName { id: ItemId::new(0), components }
|
|
)(text)
|
|
}
|
|
|
|
fn identifier_expr(text: &str) -> ParseResult<ExpressionKind> {
|
|
map(qualified_name, ExpressionKind::Value)(text)
|
|
}
|
|
|
|
fn primary_expr(text: &str) -> ParseResult<ExpressionKind> {
|
|
// primary := literal | paren_expr | if_expr | for_expr | while_expr | identifier_expr | lambda_expr | anonymous_struct | list_expr
|
|
|
|
alt((
|
|
if_expr,
|
|
for_expr,
|
|
literal,
|
|
paren_expr,
|
|
identifier_expr,
|
|
))(text)
|
|
}
|
|
|
|
fn for_expr(text: &str) -> ParseResult<ExpressionKind> {
|
|
//TODO do I need something like no struct literal here?
|
|
let en = alt((
|
|
map(enumerator, |e| vec![e]),
|
|
delimited(tag("{"), enumerators, tag("}"))
|
|
));
|
|
preceded(tag("for"),
|
|
map(tuple((en, for_expr_body)),
|
|
|(enumerators, body)| ExpressionKind::ForExpression { enumerators, body: Box::new(body) }
|
|
))(text)
|
|
}
|
|
|
|
fn enumerators(text: &str) -> ParseResult<Vec<Enumerator>> {
|
|
separated_nonempty_list(alt((value((), tag(",")), statement_sep)),
|
|
enumerator)(text)
|
|
}
|
|
|
|
fn enumerator(text: &str) -> ParseResult<Enumerator> {
|
|
map(
|
|
tuple((identifier, tag("<-"), expression)),
|
|
|(id, _, generator)| Enumerator { id, generator }
|
|
)(text)
|
|
}
|
|
|
|
fn for_expr_body(text: &str) -> ParseResult<ForBody> {
|
|
unimplemented!()
|
|
}
|
|
|
|
fn invocation_argument(text: &str) -> ParseResult<InvocationArgument> {
|
|
use nom::character::complete::char;
|
|
alt((
|
|
value(InvocationArgument::Ignored, pair(char('_'), alphanumeric0)),
|
|
map(expression_kind, |kind: ExpressionKind| InvocationArgument::Positional(
|
|
Expression { id: ItemId::new(0), kind, type_anno: None }))
|
|
//map(identifier, |id: Rc<String>|
|
|
))(text)
|
|
}
|
|
|
|
fn if_expr(text: &str) -> ParseResult<ExpressionKind> {
|
|
let p = preceded(tag("if"), pair(ws(discriminator), ws(if_expr_body)));
|
|
map(p, |(discriminator, body)| {
|
|
let discriminator = discriminator.map(Box::new);
|
|
let body = Box::new(body);
|
|
ExpressionKind::IfExpression { discriminator, body }
|
|
}) (text)
|
|
}
|
|
|
|
fn discriminator(text: &str) -> ParseResult<Option<Expression>> {
|
|
use nom::combinator::verify;
|
|
cond(text.chars().next().map(|c| c != '{').unwrap_or(true),
|
|
expression
|
|
)(text)
|
|
}
|
|
|
|
fn if_expr_body(text: &str) -> ParseResult<IfExpressionBody> {
|
|
alt((
|
|
preceded(tag("then"), simple_conditional),
|
|
preceded(tag("is"), simple_pattern_match),
|
|
cond_block,
|
|
))(text)
|
|
}
|
|
|
|
fn simple_conditional(text: &str) -> ParseResult<IfExpressionBody> {
|
|
map(
|
|
pair(expr_or_block, else_case),
|
|
|(then_case, else_case)| IfExpressionBody::SimpleConditional { then_case, else_case }
|
|
)(text)
|
|
}
|
|
|
|
fn else_case(text: &str) -> ParseResult<Option<Block>> {
|
|
opt(preceded(tag("else"), expr_or_block))(text)
|
|
}
|
|
|
|
fn simple_pattern_match(text: &str) -> ParseResult<IfExpressionBody> {
|
|
let p = tuple((pattern, tag("then"), expr_or_block, else_case));
|
|
map(p, |(pattern, _, then_case, else_case)|
|
|
IfExpressionBody::SimplePatternMatch { pattern, then_case, else_case }
|
|
)(text)
|
|
}
|
|
|
|
fn pattern(text: &str) -> ParseResult<Pattern> {
|
|
use nom::character::complete::char;
|
|
|
|
let t = delimited(char('('),
|
|
separated_nonempty_list(char(','), pattern),
|
|
char(')')
|
|
);
|
|
|
|
alt((
|
|
map(t, |patterns| Pattern::TuplePattern(patterns)),
|
|
simple_pattern,
|
|
))(text)
|
|
}
|
|
|
|
fn simple_pattern(text: &str) -> ParseResult<Pattern> {
|
|
alt((
|
|
value(Pattern::Ignored, tag("_")),
|
|
tuple_struct_pattern,
|
|
record_pattern,
|
|
map(pattern_literal, Pattern::Literal),
|
|
map(qualified_name, Pattern::VarOrName),
|
|
))(text)
|
|
}
|
|
|
|
fn tuple_struct_pattern(text: &str) -> ParseResult<Pattern> {
|
|
unimplemented!()
|
|
}
|
|
|
|
fn record_pattern(text: &str) -> ParseResult<Pattern> {
|
|
unimplemented!()
|
|
}
|
|
|
|
fn pattern_literal(text: &str) -> ParseResult<PatternLiteral> {
|
|
use PatternLiteral::*;
|
|
use nom::character::complete::char;
|
|
alt((
|
|
value(BoolPattern(true), tag("true")),
|
|
value(BoolPattern(false), tag("false")),
|
|
map(delimited(char('"'), take_until("\""), char('"')), |s: &str| StringPattern(Rc::new(s.to_string()))),
|
|
))(text)
|
|
//TODO handle signed_number_literal
|
|
}
|
|
|
|
fn cond_block(text: &str) -> ParseResult<IfExpressionBody> {
|
|
use nom::character::complete::char;
|
|
//TODO maybe change this bit of syntax
|
|
let comma_or_delimitor = alt((value((), char(',')), statement_sep));
|
|
let p = delimited(char('{'),
|
|
separated_nonempty_list(comma_or_delimitor, cond_arm),
|
|
char('}'));
|
|
map(p, IfExpressionBody::CondList)(text)
|
|
}
|
|
|
|
fn cond_arm(text: &str) -> ParseResult<ConditionArm> {
|
|
let variant_1 = map(
|
|
tuple((condition, guard, tag("then"), expr_or_block)),
|
|
|(condition, guard, _, body)| ConditionArm { condition, guard, body }
|
|
);
|
|
let variant_2 = map(
|
|
preceded(tag("else"), expr_or_block),
|
|
|body| ConditionArm { condition: Condition::Else, guard: None, body }
|
|
);
|
|
alt((variant_1, variant_2))(text)
|
|
}
|
|
|
|
fn condition(text: &str) -> ParseResult<Condition> {
|
|
alt((
|
|
map(preceded(tag("is"), pattern), Condition::Pattern),
|
|
map(tuple((binop, expression)), |(op, expr)|
|
|
Condition::TruncatedOp(op, expr)),
|
|
map(expression, Condition::Expression),
|
|
))(text)
|
|
}
|
|
|
|
fn guard(text: &str) -> ParseResult<Option<Expression>> {
|
|
opt(preceded(tag("if"), expression))(text)
|
|
}
|
|
|
|
fn expr_or_block(text: &str) -> ParseResult<Block> {
|
|
//TODO fix
|
|
alt((block, map(expression, |expr| vec![Statement { id: ItemId::new(0), kind: StatementKind::Expression(expr)}])))(text)
|
|
}
|
|
|
|
fn block(text: &str) -> ParseResult<Block> {
|
|
use nom::character::complete::char;
|
|
//TODO fix this so it can handle nested statements
|
|
delimited(char('{'),
|
|
separated_nonempty_list(statement_sep,
|
|
map(expression, |e| Statement { id: ItemId::new(0), kind: StatementKind::Expression(e) })
|
|
),
|
|
char('}'))(text)
|
|
}
|
|
|
|
fn call_expr(text: &str) -> ParseResult<ExpressionKind> {
|
|
use nom::character::complete::char;
|
|
let parse_call = opt(
|
|
delimited(char('('), separated_list(char(','), invocation_argument), char(')'))
|
|
);
|
|
let p = pair(primary_expr, parse_call);
|
|
map(p, |(expr, call_part)| if let Some(arguments) = call_part {
|
|
let f = bx!(Expression { id: ItemId::new(0), kind: expr, type_anno: None });
|
|
ExpressionKind::Call { f, arguments }
|
|
} else {
|
|
expr
|
|
})(text)
|
|
}
|
|
|
|
fn prefix_expr(text: &str) -> ParseResult<ExpressionKind> {
|
|
let (text, pfx) = ws(opt(prefix_op))(text)?;
|
|
let (text, result) = call_expr(text)?;
|
|
match pfx {
|
|
None => Ok((text, result)),
|
|
Some(pfx) => {
|
|
let exp = Expression { id: ItemId::new(0), kind: result, type_anno: None };
|
|
Ok((text, ExpressionKind::PrefixExp(pfx, Box::new(exp))))
|
|
}
|
|
}
|
|
}
|
|
|
|
// this implements Pratt parsing, see http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
|
|
fn precedence_expr(text: &str) -> ParseResult<ExpressionKind> {
|
|
fn inner_precedence_expr(input: &str, precedence: i32) -> ParseResult<ExpressionKind> {
|
|
let (mut outer_rest, mut lhs) = prefix_expr(input)?;
|
|
loop {
|
|
let (rest, _) = space0(outer_rest)?;
|
|
let (rest, maybe_binop) = opt(binop)(rest)?;
|
|
let (new_precedence, binop) = match maybe_binop {
|
|
Some(binop) => (binop.precedence(), binop),
|
|
None => break,
|
|
};
|
|
|
|
if precedence >= new_precedence {
|
|
break;
|
|
}
|
|
let (rest, _) = space0(rest)?;
|
|
let (rest, rhs) = inner_precedence_expr(rest, new_precedence)?;
|
|
outer_rest = rest;
|
|
lhs = ExpressionKind::BinExp(binop,
|
|
bx!(Expression::new(ItemId::new(0), lhs)),
|
|
bx!(Expression::new(ItemId::new(0), rhs))
|
|
);
|
|
}
|
|
Ok((outer_rest, lhs))
|
|
}
|
|
context("Precedence expression",
|
|
|input| inner_precedence_expr(input, BinOp::min_precedence())
|
|
)(text)
|
|
}
|
|
|
|
fn expression_kind(text: &str) -> ParseResult<ExpressionKind> {
|
|
context("Expression kind", precedence_expr)(text)
|
|
}
|
|
|
|
fn type_anno(text: &str) -> ParseResult<TypeIdentifier> {
|
|
use nom::character::complete::char;
|
|
preceded(ws(char(':')), ws(type_name))(text)
|
|
}
|
|
|
|
fn type_name(text: &str) -> ParseResult<TypeIdentifier> {
|
|
//TODO incomplete
|
|
let (text, name) = identifier(text)?;
|
|
let id = TypeIdentifier::Singleton(TypeSingletonName { name, params: vec![] });
|
|
Ok((text, id))
|
|
}
|
|
|
|
fn expression(text: &str) -> ParseResult<Expression> {
|
|
let (rest, (kind, type_anno)) = pair(expression_kind, opt(type_anno))(text)?;
|
|
let expr = Expression { id: ItemId::new(0), kind, type_anno };
|
|
Ok((rest, expr))
|
|
}
|
|
|
|
pub fn perform_parsing(input: &str) -> Result<String, String> {
|
|
let output = match expression(input) {
|
|
Ok((rest, ast)) => format!("{:?} (rest: {})", ast, rest),
|
|
Err(nom::Err::Incomplete(needed)) => format!("Incomplete: {:?}" ,needed),
|
|
Err(nom::Err::Error(verbose_error) | nom::Err::Failure(verbose_error)) => {
|
|
format!("Verbose Error: ` {:?} `", verbose_error)
|
|
//nom::error::convert_error(input, verbose_error)
|
|
}
|
|
};
|
|
|
|
Ok(output)
|
|
}
|