//! # Parsing //! This module is where the recursive-descent parsing methods live. //! //! //! # Schala EBNF Grammar //! This document is the authoritative grammar of Schala, represented in something approximating //! Extended Backus-Naur form. Terminal productions are in "double quotes", or UPPERCASE //! if they represent a class of tokens rather than an specific string, or are otherwise //! unreprsentable in ASCII. //! //! ## Top level structure //! //! ```text //! program := (statement delimiter)* EOF //! delimiter := NEWLINE | ";" //! statement := expression | declaration | import //! block := "{" (statement delimiter)* "}" //! declaration := type_declaration | func_declaration | binding_declaration | impl_declaration //! ``` //! ## Declarations //! //! ### Types //! ```text //! type_declaration := "type" type_declaration_body //! type_declaration_body := "alias" type_alias | "mut"? type_singleton_name "=" type_body //! type_alias := IDENTIFIER "=" type_name //! type_body := variant_specifier ("|" variant_specifier)* //! variant_specifier := IDENTIFIER | IDENTIFIER "{" typed_identifier_list "}" | IDENTIFIER "(" type_name* ")" //! typed_identifier_list := typed_identifier* //! typed_identifier := IDENTIFIER type_anno //! ``` //! ### Functions //! //! ```text //! func_declaration := func_signature func_body //! func_body := ε | nonempty_func_body //! nonempty_func_body := "{" (statement delimiter)* "}" //! func_signature := "fn" func_name formal_param_list type_anno+ //! func_name := IDENTIFIER | operator //! formal_param_list := "(" (formal_param ",")* ")" //! formal_param := IDENTIFIER type_anno+ //! ``` //! //! ### Variable bindings //! ```text binding_declaration := "let" "mut"? IDENTIFIER "=" expresion``` //! //! ### Interfaces //! //! ```text //! interface_declaration := "interface" type_singleton_name signature_block //! impl_declaration := "impl" type_singleton_name decl_block | "impl" type_singleton_name "for" type_name decl_block //! decl_block := "{" (func_declaration)* "}" //! signature_block := "{" (func_signature)* "}" //! ``` //! //! ### Type Annotations //! //! ```text //! type_anno := ":" type_name //! type_name := type_singleton_name | "(" type_names ")" //! type_names := ε | type_name (, type_name)* //! type_singleton_name = IDENTIFIER (type_params)* //! type_params := "<" type_name (, type_name)* ">" //! ``` //! //! ## Expressions //! ```text //! expression := precedence_expr type_anno+ //! precedence_expr := prefix_expr //! prefix_expr := prefix_op call_expr //! prefix_op := "+" | "-" | "!" | "~" //! call_expr := index_expr ( "(" invocation_list ")" )* | ε //! invocation_list := invocation_argument ("," invocation_argument)* | ε //! invocation_argument := expression | IDENTIFIER "=" expression | "_" //! index_expr := primary ( "[" (expression ("," (expression)* | ε) "]" )* //! primary := literal | paren_expr | if_expr | for_expr | while_expr | identifier_expr | lambda_expr | anonymous_struct | list_expr //! expr_or_block := "{" (statement delimiter)* "}" | expr //! ``` //! //! ### Primary expressions //! //! ```text //! list_expr := "[" (expression, ",")* "]" //! lambda_expr := "\\" lambda_param_list type_anno+ nonempty_func_body //! lambda_param_list := formal_param_list | formal_param //! paren_expr := "(" paren_inner ")" //! paren_inner := (expression ",")* //! identifier_expr := qualified_identifier | named_struct //! qualified_identifier := IDENTIFIER ("::" IDENTIFIER)* //! ``` //! //! ## Literals //! ```text //! literal := "true" | "false" | number_literal | STR_LITERAL //! named_struct := qualified_identifier record_block //! record_block := "{" (record_entry, ",")* | "}" //TODO support anonymus structs, update syntax //! record_entry := IDENTIFIER ":" expression //! anonymous_struct := TODO //! number_literal := int_literal | float_literal //! int_literal = ("0x" | "0b") digits //! float_literal := digits ("." digits) //! digits := (digit_group underscore*)+ //! digit_group := DIGIT+ //! ``` //! //! ### Patterns //! ```text //! pattern := "(" (pattern, ",")* ")" | simple_pattern //! simple_pattern := pattern_literal | record_pattern | tuple_struct_pattern //! pattern_literal := "true" | "false" | signed_number_literal | STR_LITERAL | qualified_identifier //! signed_number_literal := "-"? number_literal //! record_pattern := qualified_identifier "{" (record_pattern_entry, ",")* "}" //! record_pattern_entry := IDENTIFIER | IDENTIFIER ":" Pattern //! tuple_struct_pattern := qualified_identifier "(" (pattern, ",")* ")" //! ``` //! //! ### If-expressions //! ```text //! if_expr := "if" discriminator ("then" condititional | "is" simple_pattern_match | guard_block) //! discriminator := precedence_expr (operator)+ //! conditional := expr_or_block else_clause //! simple_pattern_match := pattern "then" conditional //! else_clause := ε | "else" expr_or_block //! guard_block := "{" (guard_arm, ",")* "}" //! guard_arm := guard "->" expr_or_block //! guard := "is" pattern | (operator)+ precedence_expr //! ``` //! //! ### While expressions //! ```text //! while_expr := "while" while_cond "{" (statement delimiter)* "}" //! while_cond := ε | expression | expression "is" pattern //TODO maybe is-expresions should be primary //! ``` //! //! //TODO this implies there must be at least one enumerator, which the parser doesn"t support right //! //this second, and maybe should fail later anyway //! ### For-expressions //! ```text //! for_expr := "for" (enumerator | "{" enumerators "}") for_expr_body //! for_expr_body := "return" expression | "{" (statement delimiter)* "}" //! enumerators := enumerator ("," enumerators)* //! enumerator := identifier "<-" expression | identifier "=" expression //TODO add guards, etc. //! ``` //! ## Imports //! ```text //! import := 'import' IDENTIFIER (:: IDENTIFIER)* import_suffix //! import_suffix := ε | '::{' IDENTIFIER (, IDENTIFIER)* '}' | '*' //TODO add qualified, exclusions, etc. //! ``` mod test; use std::rc::Rc; use std::str::FromStr; use crate::tokenizing::*; use crate::tokenizing::Kw::*; use crate::tokenizing::TokenKind::*; use crate::ast::*; /// Represents a parsing error #[derive(Debug)] pub struct ParseError { pub msg: String, pub token: Token } impl ParseError { fn new_with_token(msg: M, token: Token) -> ParseResult where M: Into { Err(ParseError { msg: msg.into(), token }) } } /// Represents either a successful parsing result or a ParseError pub type ParseResult = Result; #[derive(Debug)] pub struct ParseRecord { production_name: String, next_token: String, level: u32, } /// Main data structure for doing parsing. pub struct Parser { token_handler: TokenHandler, parse_record: Vec, parse_level: u32, restrictions: ParserRestrictions, id_store: ItemIdStore, } struct ParserRestrictions { no_struct_literal: bool } struct TokenHandler { tokens: Vec, idx: usize, end_of_file: (usize, usize), } impl TokenHandler { fn new(tokens: Vec) -> TokenHandler { let end_of_file = match tokens.last() { None => (0, 0), Some(t) => (t.line_num, t.char_num) }; TokenHandler { idx: 0, tokens, end_of_file } } fn peek_kind(&mut self) -> TokenKind { self.peek().kind } fn peek_kind_n(&mut self, n: usize) -> TokenKind { self.peek_n(n).kind } fn peek(&mut self) -> Token { self.tokens.get(self.idx).map(|t: &Token| { t.clone()}).unwrap_or(Token { kind: TokenKind::EOF, line_num: self.end_of_file.0, char_num: self.end_of_file.1}) } /// calling peek_n(0) is the same thing as peek() fn peek_n(&mut self, n: usize) -> Token { self.tokens.get(self.idx + n).map(|t: &Token| { t.clone()}).unwrap_or(Token { kind: TokenKind::EOF, line_num: self.end_of_file.0, char_num: self.end_of_file.1}) } fn next(&mut self) -> Token { self.idx += 1; self.tokens.get(self.idx - 1).map(|t: &Token| { t.clone() }).unwrap_or(Token { kind: TokenKind::EOF, line_num: self.end_of_file.0, char_num: self.end_of_file.1}) } } impl Parser { /// Create a new parser initialized with some tokens. pub fn new(initial_input: Vec) -> Parser { Parser { token_handler: TokenHandler::new(initial_input), parse_record: vec![], parse_level: 0, restrictions: ParserRestrictions { no_struct_literal: false }, id_store: ItemIdStore::new(), } } /// Parse all loaded tokens up to this point. pub fn parse(&mut self) -> ParseResult { self.program() } /* pub fn parse_with_new_tokens(&mut self, new_tokens: Vec) -> ParseResult { } */ pub fn format_parse_trace(&self) -> String { let mut buf = String::new(); for r in self.parse_record.iter() { let mut indent = String::new(); for _ in 0..r.level { indent.push(' '); } buf.push_str(&format!("{}Production `{}`, token: {}\n", indent, r.production_name, r.next_token)) } buf } } macro_rules! print_token_pattern { ($tokenpattern:pat) => { stringify!($tokenpattern) } } macro_rules! expect { ($self:expr, $token_kind:pat) => { expect!($self, $token_kind if true) }; ($self:expr, $expected_kind:pat if $cond:expr) => { { let tok = $self.token_handler.peek(); match tok.get_kind() { $expected_kind if $cond => $self.token_handler.next(), actual_kind => { let msg = format!("Expected {}, got {:?}", print_token_pattern!($expected_kind), actual_kind); return ParseError::new_with_token(msg, tok); } } } } } macro_rules! delimited { ($self:expr, $start:pat, $parse_fn:ident, $( $delim:pat )|+, $end:pat, nonstrict) => { delimited!($self, $start, $parse_fn, $( $delim )|*, $end, false) }; ($self:expr, $start:pat, $parse_fn:ident, $( $delim:pat )|+, $end:pat) => { delimited!($self, $start, $parse_fn, $( $delim )|*, $end, true) }; ($self:expr, $start:pat, $parse_fn:ident, $( $delim:pat )|+, $end:pat, $strictness:expr) => { { expect!($self, $start); let mut acc = vec![]; loop { let peek = $self.token_handler.peek(); match peek.get_kind() { $end | EOF => break, _ => (), } if !$strictness { match peek.get_kind() { $( $delim )|* => { $self.token_handler.next(); continue }, _ => () } } acc.push($self.$parse_fn()?); match $self.token_handler.peek().get_kind() { $( $delim )|* => { $self.token_handler.next(); continue }, _ if $strictness => break, _ => continue, }; } expect!($self, $end); acc } }; } impl Parser { /// `program := (statement delimiter)* EOF` /// `delimiter := NEWLINE | ';'` #[recursive_descent_method] fn program(&mut self) -> ParseResult { let mut statements = Vec::new(); loop { match self.token_handler.peek().get_kind() { EOF => break, Newline | Semicolon => { self.token_handler.next(); continue; }, _ => statements.push( self.statement()? ), } } Ok(AST { id: self.id_store.fresh(), statements }) } /// `statement := expression | declaration` #[recursive_descent_method] fn statement(&mut self) -> ParseResult { //TODO handle error recovery here let kind = match self.token_handler.peek().get_kind() { Keyword(Type) => self.type_declaration().map(|decl| { StatementKind::Declaration(decl) }), Keyword(Func)=> self.func_declaration().map(|func| { StatementKind::Declaration(func) }), Keyword(Let) => self.binding_declaration().map(|decl| StatementKind::Declaration(decl)), Keyword(Interface) => self.interface_declaration().map(|decl| StatementKind::Declaration(decl)), Keyword(Impl) => self.impl_declaration().map(|decl| StatementKind::Declaration(decl)), Keyword(Import) => self.import_declaration().map(|spec| StatementKind::Import(spec)), _ => self.expression().map(|expr| { StatementKind::Expression(expr.into()) } ), }?; Ok(Statement { kind, id: self.id_store.fresh() }) } #[recursive_descent_method] fn type_declaration(&mut self) -> ParseResult { expect!(self, Keyword(Type)); self.type_declaration_body() } #[recursive_descent_method] fn type_declaration_body(&mut self) -> ParseResult { if let Keyword(Alias) = self.token_handler.peek_kind() { self.type_alias() } else { let mutable = if let Keyword(Mut) = self.token_handler.peek_kind() { self.token_handler.next(); true } else { false }; let name = self.type_singleton_name()?; expect!(self, Equals); let body = self.type_body()?; Ok(Declaration::TypeDecl { name, body, mutable}) } } #[recursive_descent_method] fn type_alias(&mut self) -> ParseResult { expect!(self, Keyword(Alias)); let alias = self.identifier()?; expect!(self, Equals); let original = self.identifier()?; Ok(Declaration::TypeAlias(alias, original)) } #[recursive_descent_method] fn type_body(&mut self) -> ParseResult { let mut variants = Vec::new(); variants.push(self.variant_specifier()?); loop { if let Pipe = self.token_handler.peek_kind() { self.token_handler.next(); variants.push(self.variant_specifier()?); } else { break; } } Ok(TypeBody(variants)) } #[recursive_descent_method] fn variant_specifier(&mut self) -> ParseResult { use self::Variant::*; let name = self.identifier()?; match self.token_handler.peek_kind() { LParen => { let tuple_members = delimited!(self, LParen, type_name, Comma, RParen); Ok(TupleStruct(name, tuple_members)) }, LCurlyBrace => { let typed_identifier_list = delimited!(self, LCurlyBrace, typed_identifier, Comma, RCurlyBrace); Ok(Record {name, members: typed_identifier_list }) }, _ => Ok(UnitStruct(name)) } } #[recursive_descent_method] fn typed_identifier(&mut self) -> ParseResult<(Rc, TypeIdentifier)> { let identifier = self.identifier()?; expect!(self, Colon); let type_name = self.type_name()?; Ok((identifier, type_name)) } #[recursive_descent_method] fn func_declaration(&mut self) -> ParseResult { let signature = self.func_signature()?; if let LCurlyBrace = self.token_handler.peek_kind() { let statements = self.nonempty_func_body()?; Ok(Declaration::FuncDecl(signature, statements)) } else { Ok(Declaration::FuncSig(signature)) } } #[recursive_descent_method] fn func_signature(&mut self) -> ParseResult { expect!(self, Keyword(Func)); let (name, operator) = match self.token_handler.peek_kind() { Operator(s) => { let name = s.clone(); self.token_handler.next(); (name, true) }, _ => (self.identifier()?, false) }; let params = self.formal_param_list()?; let type_anno = match self.token_handler.peek_kind() { Colon => Some(self.type_anno()?), _ => None, }; Ok(Signature { name, operator, params, type_anno }) } #[recursive_descent_method] fn nonempty_func_body(&mut self) -> ParseResult> { let statements = delimited!(self, LCurlyBrace, statement, Newline | Semicolon, RCurlyBrace, nonstrict); Ok(statements) } #[recursive_descent_method] fn formal_param_list(&mut self) -> ParseResult> { Ok(delimited!(self, LParen, formal_param, Comma, RParen)) } //TODO needs to support default values #[recursive_descent_method] fn formal_param(&mut self) -> ParseResult { let name = self.identifier()?; let anno = match self.token_handler.peek_kind() { Colon => Some(self.type_anno()?), _ => None }; let default = match self.token_handler.peek_kind() { Equals => { self.token_handler.next(); Some(self.expression()?.into()) }, _ => None }; Ok(FormalParam { name, anno, default }) } #[recursive_descent_method] fn binding_declaration(&mut self) -> ParseResult { expect!(self, Keyword(Kw::Let)); let constant = match self.token_handler.peek_kind() { Keyword(Kw::Mut) => { self.token_handler.next(); false } _ => true }; let name = self.identifier()?; let type_anno = if let Colon = self.token_handler.peek_kind() { Some(self.type_anno()?) } else { None }; expect!(self, Equals); let expr = self.expression()?.into(); Ok(Declaration::Binding { name, constant, type_anno, expr }) } #[recursive_descent_method] fn interface_declaration(&mut self) -> ParseResult { expect!(self, Keyword(Interface)); let name = self.identifier()?; let signatures = self.signature_block()?; Ok(Declaration::Interface { name, signatures }) } #[recursive_descent_method] fn signature_block(&mut self) -> ParseResult> { Ok(delimited!(self, LCurlyBrace, func_signature, Newline | Semicolon, RCurlyBrace, nonstrict)) } #[recursive_descent_method] fn impl_declaration(&mut self) -> ParseResult { expect!(self, Keyword(Impl)); let first = self.type_singleton_name()?; let second = if let Keyword(For) = self.token_handler.peek_kind() { self.token_handler.next(); Some(self.type_name()?) } else { None }; let block = self.decl_block()?; Ok(match (first, second) { (interface_name, Some(type_name)) => Declaration::Impl { type_name, interface_name: Some(interface_name), block }, (type_singleton_name, None) => Declaration::Impl { type_name: TypeIdentifier::Singleton(type_singleton_name), interface_name: None, block } }) } #[recursive_descent_method] fn decl_block(&mut self) -> ParseResult> { Ok(delimited!(self, LCurlyBrace, func_declaration, Newline | Semicolon, RCurlyBrace, nonstrict)) } #[recursive_descent_method] fn expression(&mut self) -> ParseResult { let mut expr_body = self.precedence_expr(BinOp::min_precedence())?; let type_anno = match self.token_handler.peek_kind() { Colon => Some(self.type_anno()?), _ => None }; if let Some(_) = expr_body.type_anno { return ParseError::new_with_token("Bad parse state encountered", self.token_handler.peek()); } expr_body.type_anno = type_anno; Ok(expr_body) } #[recursive_descent_method] fn type_anno(&mut self) -> ParseResult { expect!(self, Colon); self.type_name() } #[recursive_descent_method] fn type_name(&mut self) -> ParseResult { use self::TypeIdentifier::*; Ok(match self.token_handler.peek_kind() { LParen => Tuple(delimited!(self, LParen, type_name, Comma, RParen)), _ => Singleton(self.type_singleton_name()?), }) } #[recursive_descent_method] fn type_singleton_name(&mut self) -> ParseResult { Ok(TypeSingletonName { name: self.identifier()?, params: match self.token_handler.peek_kind() { LAngleBracket => delimited!(self, LAngleBracket, type_name, Comma, RAngleBracket), _ => vec![], } }) } // this implements Pratt parsing, see http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ fn precedence_expr(&mut self, precedence: i32) -> ParseResult { let record = ParseRecord { production_name: "precedence_expr".to_string(), next_token: format!("{}", self.token_handler.peek().to_string_with_metadata()), level: self.parse_level, }; self.parse_level += 1; self.parse_record.push(record); let mut lhs = self.prefix_expr()?; loop { let new_precedence = match BinOp::get_precedence_from_token(&self.token_handler.peek_kind()) { Some(p) => p, None => break, }; if precedence >= new_precedence { break; } let next_tok = self.token_handler.next(); let operation = match BinOp::from_sigil_token(&next_tok.kind) { Some(sigil) => sigil, None => unreachable!() }; let rhs = self.precedence_expr(new_precedence)?; lhs = Expression::new(self.id_store.fresh(), ExpressionKind::BinExp(operation, bx!(lhs.into()), bx!(rhs.into()))); } self.parse_level -= 1; Ok(lhs) } #[recursive_descent_method] fn prefix_expr(&mut self) -> ParseResult { match self.token_handler.peek_kind() { Operator(ref op) if PrefixOp::is_prefix(&*op) => { let sigil = match self.token_handler.next().kind { Operator(op) => op, _ => unreachable!(), }; let expr = self.primary()?; let prefix_op = PrefixOp::from_str(sigil.as_str()).unwrap(); Ok(Expression::new( self.id_store.fresh(), ExpressionKind::PrefixExp(prefix_op, bx!(expr.into())) )) }, _ => self.call_expr() } } #[recursive_descent_method] fn call_expr(&mut self) -> ParseResult { let mut expr = self.index_expr()?; while let LParen = self.token_handler.peek_kind() { let arguments = delimited!(self, LParen, invocation_argument, Comma, RParen); let arguments = arguments.into_iter().collect(); expr = Expression::new(self.id_store.fresh(), ExpressionKind::Call { f: bx!(expr.into()), arguments }); //TODO no type anno is incorrect } Ok(expr) } #[recursive_descent_method] fn invocation_argument(&mut self) -> ParseResult { Ok(match self.token_handler.peek_kind() { Underscore => { self.token_handler.next(); InvocationArgument::Ignored }, Identifier(s) => { match self.token_handler.peek_kind_n(1) { Equals => { self.token_handler.next(); self.token_handler.next(); let expr = self.expression()?.into(); InvocationArgument::Keyword { name: s.clone(), expr } }, _ => { let expr = self.expression()?; InvocationArgument::Positional(expr.into()) } } }, _ => InvocationArgument::Positional(self.expression()?.into()) }) } #[recursive_descent_method] fn index_expr(&mut self) -> ParseResult { let primary = self.primary()?; Ok(if let LSquareBracket = self.token_handler.peek_kind() { let indexers = delimited!(self, LSquareBracket, expression, Comma, RSquareBracket) .into_iter().map(|ex| ex.into()).collect(); Expression::new(self.id_store.fresh(), ExpressionKind::Index { indexee: bx!(Expression::new(self.id_store.fresh(), primary.kind).into()), indexers, }) } else { primary }) } #[recursive_descent_method] fn primary(&mut self) -> ParseResult { match self.token_handler.peek_kind() { LCurlyBrace => self.curly_brace_expr(), Backslash => self.lambda_expr(), LParen => self.paren_expr(), LSquareBracket => self.list_expr(), Keyword(Kw::If) => self.if_expr(), Keyword(Kw::For) => self.for_expr(), Keyword(Kw::While) => self.while_expr(), Identifier(_) => self.identifier_expr(), _ => self.literal(), } } #[recursive_descent_method] fn list_expr(&mut self) -> ParseResult { let exprs = delimited!(self, LSquareBracket, expression, Comma, RSquareBracket) .into_iter().map(|ex| ex.into()).collect(); Ok(Expression::new(self.id_store.fresh(), ExpressionKind::ListLiteral(exprs))) } #[recursive_descent_method] fn curly_brace_expr(&mut self) -> ParseResult { ParseError::new_with_token("Not implemented", self.token_handler.peek()) } #[recursive_descent_method] fn lambda_expr(&mut self) -> ParseResult { expect!(self, Backslash); let params = self.lambda_param_list()?; let type_anno = match self.token_handler.peek_kind() { Colon => Some(self.type_anno()?), _ => None, }; let body = self.nonempty_func_body()?; Ok(Expression::new(self.id_store.fresh(), ExpressionKind::Lambda { params, type_anno, body })) //TODO need to handle types somehow } #[recursive_descent_method] fn lambda_param_list(&mut self) -> ParseResult> { if let LParen = self.token_handler.peek_kind() { self.formal_param_list() } else { let single_param = self.formal_param()?; Ok(vec![single_param]) } } #[recursive_descent_method] fn paren_expr(&mut self) -> ParseResult { use self::ExpressionKind::*; let old_struct_value = self.restrictions.no_struct_literal; self.restrictions.no_struct_literal = false; let output = { let mut inner = delimited!(self, LParen, expression, Comma, RParen); match inner.len() { 0 => Ok(Expression::new(self.id_store.fresh(), TupleLiteral(vec![]))), 1 => Ok(inner.pop().unwrap()), _ => { let inner: Vec = inner.into_iter().map(|ex| ex.into()).collect(); Ok(Expression::new(self.id_store.fresh(), TupleLiteral(inner))) } } }; self.restrictions.no_struct_literal = old_struct_value; output } #[recursive_descent_method] fn identifier_expr(&mut self) -> ParseResult { use self::ExpressionKind::*; let components = self.qualified_identifier()?; let qualified_identifier = QualifiedName { id: self.id_store.fresh(), components }; Ok(match self.token_handler.peek_kind() { LCurlyBrace if !self.restrictions.no_struct_literal => { let fields = self.record_block()?; Expression::new(self.id_store.fresh(), NamedStruct { name: qualified_identifier, fields }) }, _ => Expression::new(self.id_store.fresh(), Value(qualified_identifier)) }) } #[recursive_descent_method] fn qualified_identifier(&mut self) -> ParseResult>> { let mut components = vec![self.identifier()?]; loop { match (self.token_handler.peek_kind(), self.token_handler.peek_kind_n(1)) { (Colon, Colon) => { self.token_handler.next(); self.token_handler.next(); components.push(self.identifier()?); }, _ => break, } } Ok(components) } #[recursive_descent_method] fn record_block(&mut self) -> ParseResult, Expression)>> { Ok( delimited!(self, LCurlyBrace, record_entry, Comma, RCurlyBrace) .into_iter().map(|(s, ex)| (s, ex.into())).collect() ) } #[recursive_descent_method] fn record_entry(&mut self) -> ParseResult<(Rc, Expression)> { let field_name = self.identifier()?; expect!(self, Colon); let value = self.expression()?; Ok((field_name, value)) } #[recursive_descent_method] fn if_expr(&mut self) -> ParseResult { expect!(self, Keyword(Kw::If)); let discriminator = Box::new({ self.restrictions.no_struct_literal = true; let x = self.discriminator(); self.restrictions.no_struct_literal = false; x? }); let body = Box::new(match self.token_handler.peek_kind() { Keyword(Kw::Then) => self.conditional()?, Keyword(Kw::Is) => self.simple_pattern_match()? , _ => self.guard_block()? }); Ok(Expression::new(self.id_store.fresh(), ExpressionKind::IfExpression { discriminator, body })) } #[recursive_descent_method] fn discriminator(&mut self) -> ParseResult { let lhs = self.prefix_expr()?; let ref next = self.token_handler.peek_kind(); Ok(if let Some(op) = BinOp::from_sigil_token(next) { Discriminator::BinOp(lhs.into(), op) } else { Discriminator::Simple(lhs.into()) }) } #[recursive_descent_method] fn conditional(&mut self) -> ParseResult { expect!(self, Keyword(Kw::Then)); let then_clause = self.expr_or_block()?; let else_clause = self.else_clause()?; Ok(IfExpressionBody::SimpleConditional(then_clause, else_clause)) } #[recursive_descent_method] fn simple_pattern_match(&mut self) -> ParseResult { expect!(self, Keyword(Kw::Is)); let pat = self.pattern()?; expect!(self, Keyword(Kw::Then)); let then_clause = self.expr_or_block()?; let else_clause = self.else_clause()?; Ok(IfExpressionBody::SimplePatternMatch(pat, then_clause, else_clause)) } #[recursive_descent_method] fn else_clause(&mut self) -> ParseResult> { Ok(if let Keyword(Kw::Else) = self.token_handler.peek_kind() { self.token_handler.next(); Some(self.expr_or_block()?) } else { None }) } #[recursive_descent_method] fn guard_block(&mut self) -> ParseResult { //TODO - delimited! isn't sophisticated enough to do thisa //let guards = delimited!(self, LCurlyBrace, guard_arm, Comma, RCurlyBrace); expect!(self, LCurlyBrace); let mut guards = vec![]; loop { match self.token_handler.peek_kind() { RCurlyBrace | EOF => break, Semicolon | Newline => { self.token_handler.next(); continue}, _ => { let guard_arm = self.guard_arm()?; guards.push(guard_arm); loop { match self.token_handler.peek_kind() { Semicolon | Newline => { self.token_handler.next(); continue; }, _ => break, } } if let RCurlyBrace = self.token_handler.peek_kind() { break; } expect!(self, Comma); } } } expect!(self, RCurlyBrace); Ok(IfExpressionBody::GuardList(guards)) } #[recursive_descent_method] fn guard_arm(&mut self) -> ParseResult { let guard = self.guard()?; expect!(self, Operator(ref c) if **c == "->"); let body = self.expr_or_block()?; Ok(GuardArm { guard, body }) } #[recursive_descent_method] fn guard(&mut self) -> ParseResult { Ok(match self.token_handler.peek_kind() { Keyword(Kw::Is) => { self.token_handler.next(); let pat = self.pattern()?; Guard::Pat(pat) }, ref tok if BinOp::from_sigil_token(tok).is_some() => { let op = BinOp::from_sigil_token(&self.token_handler.next().kind).unwrap(); let precedence = op.get_precedence(); let Expression { kind, .. } = self.precedence_expr(precedence)?; Guard::HalfExpr(HalfExpr { op: Some(op), expr: kind }) }, _ => { //TODO - I think there's a better way to do this involving the precedence of -> let Expression { kind, .. } = self.prefix_expr()?; Guard::HalfExpr(HalfExpr { op: None, expr: kind }) } }) } #[recursive_descent_method] fn pattern(&mut self) -> ParseResult { if let LParen = self.token_handler.peek_kind() { let tuple_pattern_variants = delimited!(self, LParen, pattern, Comma, RParen); Ok(Pattern::TuplePattern(tuple_pattern_variants)) } else { self.simple_pattern() } } #[recursive_descent_method] fn simple_pattern(&mut self) -> ParseResult { Ok(match self.token_handler.peek_kind() { Identifier(_) => { let components = self.qualified_identifier()?; let qualified_identifier = QualifiedName { id: self.id_store.fresh(), components }; match self.token_handler.peek_kind() { LCurlyBrace => { let members = delimited!(self, LCurlyBrace, record_pattern_entry, Comma, RCurlyBrace); Pattern::Record(qualified_identifier, members) }, LParen => { let members = delimited!(self, LParen, pattern, Comma, RParen); Pattern::TupleStruct(qualified_identifier, members) }, _ => { Pattern::VarOrName(qualified_identifier) }, } }, _ => self.pattern_literal()? }) } #[recursive_descent_method] fn pattern_literal(&mut self) -> ParseResult { let tok = self.token_handler.peek(); Ok(match tok.get_kind() { Keyword(Kw::True) => { self.token_handler.next(); Pattern::Literal(PatternLiteral::BoolPattern(true)) }, Keyword(Kw::False) => { self.token_handler.next(); Pattern::Literal(PatternLiteral::BoolPattern(false)) }, StrLiteral(s) => { self.token_handler.next(); Pattern::Literal(PatternLiteral::StringPattern(s)) }, DigitGroup(_) | HexLiteral(_) | BinNumberSigil | Period => self.signed_number_literal()?, Operator(ref op) if **op == "-" => self.signed_number_literal()?, Underscore => { self.token_handler.next(); Pattern::Ignored }, other => return ParseError::new_with_token(format!("{:?} is not a valid Pattern", other), tok) }) } /* #[recursive_descent_method] fn simple_pattern(&mut self) -> ParseResult { Ok({ let tok = self.token_handler.peek(); match tok.get_kind() { Identifier(_) => { let id = self.identifier()?; match self.token_handler.peek_kind() { LCurlyBrace => { let members = delimited!(self, LCurlyBrace, record_pattern_entry, Comma, RCurlyBrace); Pattern::Record(id, members) }, LParen => { let members = delimited!(self, LParen, pattern, Comma, RParen); Pattern::TupleStruct(id, members) }, _ => Pattern::Literal(PatternLiteral::VarPattern(id)) } }, Keyword(Kw::True) => { self.token_handler.next(); Pattern::Literal(PatternLiteral::BoolPattern(true)) }, Keyword(Kw::False) => { self.token_handler.next(); Pattern::Literal(PatternLiteral::BoolPattern(false)) }, StrLiteral(s) => { self.token_handler.next(); Pattern::Literal(PatternLiteral::StringPattern(s)) }, DigitGroup(_) | HexLiteral(_) | BinNumberSigil | Period => self.signed_number_literal()?, Operator(ref op) if **op == "-" => self.signed_number_literal()?, Underscore => { self.token_handler.next(); Pattern::Ignored }, other => return ParseError::new_with_token(format!("{:?} is not a valid Pattern", other), tok) } }) } */ #[recursive_descent_method] fn signed_number_literal(&mut self) -> ParseResult { let neg = match self.token_handler.peek_kind() { Operator(ref op) if **op == "-" => { self.token_handler.next(); true }, _ => false }; let Expression { kind, .. } = self.number_literal()?; Ok(Pattern::Literal(PatternLiteral::NumPattern { neg, num: kind })) } #[recursive_descent_method] fn record_pattern_entry(&mut self) -> ParseResult<(Rc, Pattern)> { let name = self.identifier()?; Ok(match self.token_handler.peek_kind() { Colon => { expect!(self, Colon); let pat = self.pattern()?; (name, pat) }, _ => (name.clone(), Pattern::Literal(PatternLiteral::StringPattern(name.clone()))) }) } #[recursive_descent_method] fn block(&mut self) -> ParseResult { let block = delimited!(self, LCurlyBrace, statement, Newline | Semicolon, RCurlyBrace, nonstrict); Ok(block) } #[recursive_descent_method] fn expr_or_block(&mut self) -> ParseResult { match self.token_handler.peek_kind() { LCurlyBrace => self.block(), _ => { let expr = self.expression()?; let s = Statement { id: self.id_store.fresh(), kind: StatementKind::Expression(expr.into()) }; Ok(vec![s]) } } } #[recursive_descent_method] fn while_expr(&mut self) -> ParseResult { use self::ExpressionKind::*; expect!(self, Keyword(Kw::While)); let condition = { self.restrictions.no_struct_literal = true; let x = self.while_cond(); self.restrictions.no_struct_literal = false; x?.map(|expr| bx!(expr.into())) }; let body = self.block()?; Ok(Expression::new(self.id_store.fresh(), WhileExpression {condition, body})) } #[recursive_descent_method] fn while_cond(&mut self) -> ParseResult> { Ok(match self.token_handler.peek_kind() { LCurlyBrace => None, _ => Some(self.expression()?), }) } #[recursive_descent_method] fn for_expr(&mut self) -> ParseResult { expect!(self, Keyword(Kw::For)); let enumerators = if let LCurlyBrace = self.token_handler.peek_kind() { delimited!(self, LCurlyBrace, enumerator, Comma | Newline, RCurlyBrace) } else { let single_enum = { self.restrictions.no_struct_literal = true; let s = self.enumerator(); self.restrictions.no_struct_literal = false; s? }; vec![single_enum] }; let body = Box::new(self.for_expr_body()?); Ok(Expression::new(self.id_store.fresh(), ExpressionKind::ForExpression { enumerators, body })) } #[recursive_descent_method] fn enumerator(&mut self) -> ParseResult { let id = self.identifier()?; expect!(self, Operator(ref c) if **c == "<-"); let generator = self.expression()?.into(); Ok(Enumerator { id, generator }) } #[recursive_descent_method] fn for_expr_body(&mut self) -> ParseResult { use self::ForBody::*; let tok = self.token_handler.peek(); Ok(match tok.get_kind() { LCurlyBrace => { let statements = delimited!(self, LCurlyBrace, statement, Newline | Semicolon, RCurlyBrace, nonstrict); StatementBlock(statements) }, Keyword(Kw::Return) => { self.token_handler.next(); MonadicReturn(self.expression()?.into()) }, _ => return ParseError::new_with_token("for expressions must end in a block or 'return'", tok), }) } #[recursive_descent_method] fn identifier(&mut self) -> ParseResult> { let tok = self.token_handler.next(); match tok.get_kind() { Identifier(s) => Ok(s), p => ParseError::new_with_token(format!("Expected an identifier, got {:?}", p), tok), } } #[recursive_descent_method] fn literal(&mut self) -> ParseResult { use self::ExpressionKind::*; let tok = self.token_handler.peek(); match tok.get_kind() { DigitGroup(_) | HexLiteral(_) | BinNumberSigil | Period => self.number_literal(), Keyword(Kw::True) => { self.token_handler.next(); let id = self.id_store.fresh(); Ok(Expression::new(id, BoolLiteral(true))) }, Keyword(Kw::False) => { self.token_handler.next(); let id = self.id_store.fresh(); Ok(Expression::new(id, BoolLiteral(false))) }, StrLiteral(s) => { self.token_handler.next(); let id = self.id_store.fresh(); Ok(Expression::new(id, StringLiteral(s.clone()))) } e => ParseError::new_with_token(format!("Expected a literal expression, got {:?}", e), tok), } } #[recursive_descent_method] fn number_literal(&mut self) -> ParseResult { match self.token_handler.peek_kind() { HexLiteral(_) | BinNumberSigil => self.int_literal(), _ => self.float_literal(), } } #[recursive_descent_method] fn int_literal(&mut self) -> ParseResult { use self::ExpressionKind::*; let tok = self.token_handler.next(); match tok.get_kind() { BinNumberSigil => { let digits = self.digits()?; let n = parse_binary(digits, tok)?; Ok(Expression::new(self.id_store.fresh(), NatLiteral(n))) }, HexLiteral(text) => { let digits: String = text.chars().filter(|c| c.is_digit(16)).collect(); let n = parse_hex(digits, tok)?; Ok(Expression::new(self.id_store.fresh(), NatLiteral(n))) }, _ => return ParseError::new_with_token("Expected '0x' or '0b'", tok), } } #[recursive_descent_method] fn float_literal(&mut self) -> ParseResult { use self::ExpressionKind::*; let tok = self.token_handler.peek(); let mut digits = self.digits()?; if let Period = self.token_handler.peek_kind() { self.token_handler.next(); digits.push_str("."); digits.push_str(&self.digits()?); match digits.parse::() { Ok(f) => Ok(Expression::new(self.id_store.fresh(), FloatLiteral(f))), Err(e) => ParseError::new_with_token(format!("Float failed to parse with error: {}", e), tok), } } else { match digits.parse::() { Ok(d) => Ok(Expression::new(self.id_store.fresh(), NatLiteral(d))), Err(e) => ParseError::new_with_token(format!("Integer failed to parse with error: {}", e), tok), } } } #[recursive_descent_method] fn digits(&mut self) -> ParseResult { let mut ds = String::new(); loop { match self.token_handler.peek_kind() { Underscore => { self.token_handler.next(); continue; }, DigitGroup(ref s) => { self.token_handler.next(); ds.push_str(s)}, _ => break, } } Ok(ds) } #[recursive_descent_method] fn import_declaration(&mut self) -> ParseResult { expect!(self, Keyword(Import)); let mut path_components = vec![]; path_components.push(self.identifier()?); loop { match (self.token_handler.peek_kind(), self.token_handler.peek_kind_n(1)) { (Colon, Colon) => { self.token_handler.next(); self.token_handler.next(); if let Identifier(_) = self.token_handler.peek_kind() { path_components.push(self.identifier()?); } else { break; } }, _ => break, } } let imported_names = match self.token_handler.peek_kind() { LCurlyBrace => { let names = delimited!(self, LCurlyBrace, identifier, Comma, RCurlyBrace); ImportedNames::List(names) }, Operator(ref s) if **s == "*" => { self.token_handler.next(); ImportedNames::All }, _ => ImportedNames::LastOfPath }; Ok(ImportSpecifier { id: self.id_store.fresh(), path_components, imported_names }) } #[recursive_descent_method] fn import_suffix(&mut self) -> ParseResult { Ok(match self.token_handler.peek_kind() { Operator(ref s) if **s == "*" => { self.token_handler.next(); ImportedNames::All }, LCurlyBrace => { let names = delimited!(self, LCurlyBrace, identifier, Comma, RCurlyBrace); ImportedNames::List(names) }, _ => return ParseError::new_with_token("Expected '{{' or '*'", self.token_handler.peek()), }) } } fn parse_binary(digits: String, tok: Token) -> ParseResult { let mut result: u64 = 0; let mut multiplier = 1; for d in digits.chars().rev() { match d { '1' => result += multiplier, '0' => (), _ => return ParseError::new_with_token("Encountered a character not '1' or '0 while parsing a binary literal", tok), } multiplier = match multiplier.checked_mul(2) { Some(m) => m, None => return ParseError::new_with_token("This binary expression will overflow", tok) } } Ok(result) } fn parse_hex(digits: String, tok: Token) -> ParseResult { let mut result: u64 = 0; let mut multiplier: u64 = 1; for d in digits.chars().rev() { match d.to_digit(16) { Some(n) => result += n as u64 * multiplier, None => return ParseError::new_with_token("Encountered a non-hex digit in a hex literal", tok), } multiplier = match multiplier.checked_mul(16) { Some(m) => m, None => return ParseError::new_with_token("This hex expression will overflow", tok) } } Ok(result) }