2017-09-06 05:09:20 -07:00
extern crate itertools ;
2017-09-07 19:38:22 -07:00
use std ::collections ::HashMap ;
2017-08-29 05:08:09 -07:00
use std ::rc ::Rc ;
2017-09-06 05:09:20 -07:00
use std ::iter ::{ Enumerate , Peekable } ;
use self ::itertools ::Itertools ;
2017-09-09 01:25:11 -07:00
use std ::vec ::IntoIter ;
2017-09-06 05:09:20 -07:00
use std ::str ::Chars ;
2017-08-29 04:27:07 -07:00
2017-09-09 01:25:11 -07:00
#[ derive(Debug, PartialEq, Clone) ]
2017-09-04 12:17:20 -07:00
pub enum TokenType {
2017-09-06 05:09:20 -07:00
Newline , Semicolon ,
2017-09-04 12:17:20 -07:00
2017-09-06 05:09:20 -07:00
LParen , RParen ,
LSquareBracket , RSquareBracket ,
LAngleBracket , RAngleBracket ,
LCurlyBrace , RCurlyBrace ,
2017-09-07 22:29:23 -07:00
Pipe ,
2017-09-04 12:17:20 -07:00
2017-09-06 05:09:20 -07:00
Comma , Period , Colon , Underscore ,
2017-09-04 12:17:20 -07:00
2017-09-06 05:09:20 -07:00
Operator ( Rc < String > ) ,
DigitGroup ( Rc < String > ) , HexNumberSigil , BinNumberSigil ,
2017-08-29 05:08:09 -07:00
StrLiteral ( Rc < String > ) ,
Identifier ( Rc < String > ) ,
2017-09-04 12:17:20 -07:00
Keyword ( Kw ) ,
2017-09-06 05:09:20 -07:00
2017-09-11 02:07:17 -07:00
EOF ,
2017-09-06 05:09:20 -07:00
Error ( String ) ,
2017-08-29 05:08:09 -07:00
}
2017-09-11 03:13:19 -07:00
use self ::TokenType ::* ;
2017-08-29 05:08:09 -07:00
2017-09-08 02:43:03 -07:00
#[ derive(Debug, Clone, Copy, PartialEq) ]
2017-09-04 12:17:20 -07:00
pub enum Kw {
2017-09-07 22:29:23 -07:00
If , Else ,
2017-09-04 12:17:20 -07:00
Func ,
2017-09-06 05:09:20 -07:00
For ,
2017-09-07 23:40:42 -07:00
Match ,
Var , Const , Let , In ,
Alias , Type , SelfType , SelfIdent ,
2017-09-07 22:29:23 -07:00
Trait , Impl ,
True , False
2017-09-04 12:17:20 -07:00
}
2017-09-11 03:13:19 -07:00
use self ::Kw ::* ;
2017-09-04 12:17:20 -07:00
2017-09-07 19:38:22 -07:00
lazy_static! {
static ref KEYWORDS : HashMap < & 'static str , Kw > =
hashmap! {
" if " = > Kw ::If ,
" else " = > Kw ::Else ,
2017-09-07 22:29:23 -07:00
" fn " = > Kw ::Func ,
" for " = > Kw ::For ,
2017-09-07 23:40:42 -07:00
" match " = > Kw ::Match ,
2017-09-07 22:29:23 -07:00
" var " = > Kw ::Var ,
" const " = > Kw ::Const ,
" let " = > Kw ::Let ,
2017-09-07 23:40:42 -07:00
" in " = > Kw ::In ,
" alias " = > Kw ::Alias ,
2017-09-07 22:29:23 -07:00
" type " = > Kw ::Type ,
" Self " = > Kw ::SelfType ,
" self " = > Kw ::SelfIdent ,
" trait " = > Kw ::Trait ,
" impl " = > Kw ::Impl ,
" true " = > Kw ::True ,
" false " = > Kw ::False ,
2017-09-07 19:38:22 -07:00
} ;
}
2017-09-04 12:17:20 -07:00
#[ derive(Debug) ]
pub struct Token {
token_type : TokenType ,
2017-09-06 05:09:20 -07:00
offset : usize ,
2017-09-04 12:17:20 -07:00
}
2017-09-06 23:52:25 -07:00
impl Token {
pub fn get_error ( & self ) -> Option < & String > {
match self . token_type {
TokenType ::Error ( ref s ) = > Some ( s ) ,
_ = > None ,
}
}
}
2017-09-06 05:09:20 -07:00
fn is_digit ( c : & char ) -> bool {
c . is_digit ( 10 )
}
type CharIter < ' a > = Peekable < Enumerate < Chars < ' a > > > ;
2017-09-06 23:52:25 -07:00
pub fn tokenize ( input : & str ) -> Vec < Token > {
2017-09-06 05:09:20 -07:00
let mut tokens : Vec < Token > = Vec ::new ( ) ;
let mut input : CharIter = input . chars ( ) . enumerate ( ) . peekable ( ) ;
while let Some ( ( idx , c ) ) = input . next ( ) {
let cur_tok_type = match c {
'#' = > {
if let Some ( & ( _ , '{' ) ) = input . peek ( ) {
} else {
while let Some ( ( _ , c ) ) = input . next ( ) {
if c = = '\n' {
break ;
}
}
}
continue ;
} ,
2017-09-07 22:29:23 -07:00
c if c . is_whitespace ( ) & & c ! = '\n' = > continue ,
2017-09-06 05:09:20 -07:00
'\n' = > Newline , ';' = > Semicolon ,
2017-09-06 09:42:29 -07:00
':' = > Colon , ',' = > Comma , '.' = > Period ,
2017-09-06 05:09:20 -07:00
'(' = > LParen , ')' = > RParen ,
'{' = > LCurlyBrace , '}' = > RCurlyBrace ,
'<' = > LAngleBracket , '>' = > RAngleBracket ,
'[' = > LSquareBracket , ']' = > RSquareBracket ,
2017-09-07 22:29:23 -07:00
'|' = > Pipe ,
2017-09-06 09:42:29 -07:00
'"' = > handle_quote ( & mut input ) ,
2017-09-06 05:09:20 -07:00
c if is_digit ( & c ) = > handle_digit ( c , & mut input ) ,
2017-09-08 01:33:27 -07:00
c if c . is_alphabetic ( ) | | c = = '_' = > handle_alphabetic ( c , & mut input ) , //TODO I'll probably have to rewrite this if I care about types being uppercase, also type parameterization
2017-09-06 09:42:29 -07:00
c = > handle_operator ( c , & mut input ) ,
2017-09-06 05:09:20 -07:00
} ;
tokens . push ( Token { token_type : cur_tok_type , offset : idx } ) ;
}
2017-09-06 23:52:25 -07:00
tokens
2017-09-06 05:09:20 -07:00
}
fn handle_digit ( c : char , input : & mut CharIter ) -> TokenType {
if c = = '0' & & input . peek ( ) . map_or ( false , | & ( _ , c ) | { c = = 'x' } ) {
input . next ( ) ;
HexNumberSigil
} else if c = = '0' & & input . peek ( ) . map_or ( false , | & ( _ , c ) | { c = = 'b' } ) {
input . next ( ) ;
BinNumberSigil
} else {
let mut buf = c . to_string ( ) ;
buf . extend ( input . peeking_take_while ( | & ( _ , ref c ) | is_digit ( c ) ) . map ( | ( _ , c ) | { c } ) ) ;
DigitGroup ( Rc ::new ( buf ) )
}
2017-08-29 05:08:09 -07:00
}
2017-09-06 09:42:29 -07:00
fn handle_quote ( input : & mut CharIter ) -> TokenType {
2017-09-06 16:52:49 -07:00
let mut buf = String ::new ( ) ;
2017-09-07 00:18:36 -07:00
loop {
match input . next ( ) . map ( | ( _ , c ) | { c } ) {
Some ( '"' ) = > break ,
Some ( '\\' ) = > {
let next = input . peek ( ) . map ( | & ( _ , c ) | { c } ) ;
if next = = Some ( 'n' ) {
input . next ( ) ;
buf . push ( '\n' )
} else if next = = Some ( '"' ) {
input . next ( ) ;
buf . push ( '"' ) ;
} else if next = = Some ( 't' ) {
input . next ( ) ;
buf . push ( '\t' ) ;
}
} ,
Some ( c ) = > buf . push ( c ) ,
None = > return TokenType ::Error ( format! ( " Unclosed string " ) ) ,
2017-09-06 16:52:49 -07:00
}
}
TokenType ::StrLiteral ( Rc ::new ( buf ) )
2017-09-06 09:42:29 -07:00
}
fn handle_alphabetic ( c : char , input : & mut CharIter ) -> TokenType {
2017-09-07 19:38:22 -07:00
let mut buf = String ::new ( ) ;
buf . push ( c ) ;
2017-09-08 01:33:27 -07:00
if c = = '_' & & input . peek ( ) . map ( | & ( _ , c ) | { ! c . is_alphabetic ( ) } ) . unwrap_or ( true ) {
2017-09-11 02:07:17 -07:00
return TokenType ::Underscore
2017-09-08 01:33:27 -07:00
}
2017-09-07 19:38:22 -07:00
loop {
match input . peek ( ) . map ( | & ( _ , c ) | { c } ) {
Some ( c ) if c . is_alphanumeric ( ) = > {
input . next ( ) ;
buf . push ( c ) ;
} ,
_ = > break ,
}
}
match KEYWORDS . get ( buf . as_str ( ) ) {
Some ( kw ) = > TokenType ::Keyword ( kw . clone ( ) ) ,
None = > TokenType ::Identifier ( Rc ::new ( buf ) ) ,
}
2017-09-06 09:42:29 -07:00
}
fn handle_operator ( c : char , input : & mut CharIter ) -> TokenType {
2017-09-07 22:29:23 -07:00
let mut buf = String ::new ( ) ;
buf . push ( c ) ;
loop {
match input . peek ( ) . map ( | & ( _ , c ) | { c } ) {
Some ( c ) if ! c . is_alphabetic ( ) & & ! c . is_whitespace ( ) = > {
input . next ( ) ;
buf . push ( c ) ;
} ,
_ = > break
}
}
TokenType ::Operator ( Rc ::new ( buf ) )
2017-09-06 09:42:29 -07:00
}
2017-09-08 02:43:03 -07:00
#[ cfg(test) ]
mod schala_tokenizer_tests {
use super ::* ;
use super ::TokenType ::* ;
use super ::Kw ::* ;
2017-09-11 02:07:17 -07:00
macro_rules ! digit { ( $ident :expr ) = > { DigitGroup ( Rc ::new ( $ident . to_string ( ) ) ) } }
2017-09-08 02:43:03 -07:00
macro_rules ! ident { ( $ident :expr ) = > { Identifier ( Rc ::new ( $ident . to_string ( ) ) ) } }
macro_rules ! op { ( $ident :expr ) = > { Operator ( Rc ::new ( $ident . to_string ( ) ) ) } }
2017-09-12 00:48:37 -07:00
2017-09-08 02:43:03 -07:00
#[ test ]
fn tokens ( ) {
let a = tokenize ( " let a: A<B> = c ++ d " ) ;
let token_types : Vec < TokenType > = a . into_iter ( ) . map ( move | t | t . token_type ) . collect ( ) ;
assert_eq! ( token_types , vec! [ Keyword ( Let ) , ident! ( " a " ) , Colon , ident! ( " A " ) ,
LAngleBracket , ident! ( " B " ) , RAngleBracket , op! ( " = " ) , ident! ( " c " ) , op! ( " ++ " ) , ident! ( " d " ) ] ) ;
}
2017-09-11 02:07:17 -07:00
#[ test ]
fn underscores ( ) {
let token_types : Vec < TokenType > = tokenize ( " 4_8 " ) . into_iter ( ) . map ( move | t | t . token_type ) . collect ( ) ;
assert_eq! ( token_types , vec! [ digit! ( " 4 " ) , Underscore , digit! ( " 8 " ) ] ) ;
}
2017-09-08 02:43:03 -07:00
}
2017-09-06 09:42:29 -07:00
2017-08-29 05:08:09 -07:00
/*
2017-09-11 02:07:17 -07:00
Schala ( PROVISIONAL ! ! ) EBNF grammar
2017-08-30 04:28:52 -07:00
' ' = literal , all other symbols are nonterminals
2017-08-29 05:08:09 -07:00
program := ( statement delimiter ? ) *
2017-08-30 04:28:52 -07:00
delimiter := ' Newline ' | ';'
2017-08-29 05:08:09 -07:00
statement := declaration | expression
2017-08-30 04:28:52 -07:00
declaration := module | function | type_decl
type_decl := ' type ' type_format
type_format := ' alias ' ' = ' type | type_constructor
type_constructor := capital_ident '=' type_rhs
type_rhs := struct_decl | type_variant ( '|' type_variant ) *
struct_decl := ' struct ' ' { ' ( ident ':' type ) * '}'
type_variant := capital_ident | tuple_type | capital_ident struct_decl
tuple_type := // something like Variant(a,b)
type := // something like Type[A[b]]
ascription := expression ( ':' type ) +
function := ' fn ' prototype '{' ( statement ) * '}'
prototype := identifier '(' identlist ')'
identlist := identifier ( ',' identifier ) * | ε
2017-08-29 05:08:09 -07:00
declaration := FN prototype LCurlyBrace ( statement ) * RCurlyBrace
prototype := identifier LParen identlist RParen
identlist := Ident ( Comma Ident ) * | ε
exprlist := Expression ( Comma Expression ) * | ε
itemlist := Ident COLON Expression ( Comma Ident COLON Expression ) * | ε
expression := postop_expression ( op postop_expression ) *
postop_expression := primary_expression postop
primary_expression := number_expr | String | identifier_expr | paren_expr | conditional_expr | while_expr | lambda_expr | list_expr | struct_expr
number_expr := ( PLUS | MINUS ) number_expr | Number
identifier_expr := call_expression | Variable
list_expr := LSquareBracket exprlist RSquareBracket
struct_expr := LCurlyBrace itemlist RCurlyBrace
call_expression := Identifier LParen exprlist RParen
while_expr := WHILE primary_expression LCurlyBrace ( expression delimiter ) * RCurlyBrace
paren_expr := LParen expression RParen
conditional_expr := IF expression LCurlyBrace ( expression delimiter ) * RCurlyBrace ( LCurlyBrace ( expresion delimiter ) * RCurlyBrace ) ?
lambda_expr := FN LParen identlist RParen LCurlyBrace ( expression delimiter ) * RCurlyBrace
lambda_call := | LParen exprlist RParen
postop := ε | LParen exprlist RParen | LBracket expression RBracket
op := '+' , '-' , etc .
* /
2017-09-11 02:07:17 -07:00
/* Schala EBNF Grammar */
2017-09-13 22:40:05 -07:00
/* Terminal productions are «in Guillemets» or UPPERCASE if they are a class
* or not representable in ASCII
2017-09-11 02:07:17 -07:00
program := ( statement delimiter ) * EOF
2017-09-13 22:40:05 -07:00
delimiter := NEWLINE | « ; »
2017-09-11 02:07:17 -07:00
statement := expression | declaration
2017-09-13 22:40:05 -07:00
declaration := type_alias | type_declaration | func_declaration
2017-09-11 02:07:17 -07:00
2017-09-13 22:40:05 -07:00
type_alias := « alias » IDENTIFIER « = » IDENTIFIER
type_declaration := « type » IDENTIFIER « = » type_body
type_body := variant_specifier ( « | » variant_specifier ) *
variant_specifier := « { » member_list « } »
member_list := ( IDENTIFIER type_anno ) *
2017-09-11 20:37:19 -07:00
2017-09-13 22:40:05 -07:00
func_declaration := « fn » IDENTIFIER « ( » param_list « ) »
param_list := ( IDENTIFIER type_anno + « , » ) *
2017-09-11 02:07:17 -07:00
2017-09-13 22:40:05 -07:00
type_anno := « :» type
2017-09-11 23:16:37 -07:00
2017-09-12 02:30:27 -07:00
expression := precedence_expr
precedence_expr := primary
2017-09-13 20:10:06 -07:00
primary := literal | paren_expr | identifier_expr
2017-09-13 22:40:05 -07:00
2017-09-13 20:10:06 -07:00
paren_expr := LParen expression RParen
identifier_expr := call_expr | index_expr | IDENTIFIER
2017-09-13 22:40:05 -07:00
literal := « true » | « false » | number_literal | str_literal
2017-09-11 02:07:17 -07:00
2017-09-13 22:40:05 -07:00
call_expr := IDENTIFIER « ( » expr_list « ) » //TODO maybe make this optional? or no, have a bare identifier meant to be used as method taken care of in eval
index_expr := « ( » ( expression ( « , » ( expression ) * | ε ) « ) »
expr_list := expression ( « , » expression ) * | ε
2017-09-11 15:42:49 -07:00
2017-09-11 02:07:17 -07:00
// a float_literal can still be assigned to an int in type-checking
number_literal := int_literal | float_literal
2017-09-13 22:40:05 -07:00
int_literal = ( ' 0 x ' | ' 0 b ' ) digits
float_literal := digits ( '.' digits )
digits := ( DIGIT_GROUP underscore ) +
2017-09-11 02:07:17 -07:00
* /
2017-09-09 01:25:11 -07:00
type TokenIter = Peekable < IntoIter < Token > > ;
2017-09-11 03:21:07 -07:00
#[ derive(Debug) ]
2017-09-11 02:07:17 -07:00
pub struct ParseError {
pub msg : String ,
}
impl ParseError {
fn new < T > ( msg : & str ) -> ParseResult < T > {
Err ( ParseError { msg : msg . to_string ( ) } )
}
}
pub type ParseResult < T > = Result < T , ParseError > ;
2017-09-08 16:42:42 -07:00
struct Parser {
2017-09-09 01:25:11 -07:00
tokens : TokenIter ,
}
impl Parser {
fn new ( input : Vec < Token > ) -> Parser {
Parser { tokens : input . into_iter ( ) . peekable ( ) }
}
2017-09-11 02:07:17 -07:00
fn peek ( & mut self ) -> TokenType {
self . tokens . peek ( ) . map ( | ref t | { t . token_type . clone ( ) } ) . unwrap_or ( TokenType ::EOF )
2017-09-09 01:27:15 -07:00
}
2017-09-11 02:07:17 -07:00
fn next ( & mut self ) -> TokenType {
self . tokens . next ( ) . map ( | ref t | { t . token_type . clone ( ) } ) . unwrap_or ( TokenType ::EOF )
2017-09-09 01:25:11 -07:00
}
2017-09-08 16:42:42 -07:00
}
2017-09-11 02:07:17 -07:00
macro_rules ! expect {
( $self :expr , $token_type :pat , $message :expr ) = > {
match $self . peek ( ) {
2017-09-11 15:42:49 -07:00
$token_type = > $self . next ( ) ,
_ = > return Err ( ParseError { msg : $message . to_string ( ) } ) ,
2017-09-11 02:07:17 -07:00
}
2017-09-13 22:40:05 -07:00
} ;
( $self :expr , $token_type :pat if $cond :expr , $message :expr ) = > {
match $self . peek ( ) {
$token_type if $cond = > $self . next ( ) ,
_ = > return Err ( ParseError { msg : $message . to_string ( ) } ) ,
}
2017-09-11 02:07:17 -07:00
}
2017-09-09 00:31:15 -07:00
}
2017-09-11 03:21:07 -07:00
#[ derive(Debug, PartialEq) ]
2017-09-08 16:42:42 -07:00
pub struct AST ( Vec < Statement > ) ;
#[ derive(Debug, PartialEq) ]
pub enum Statement {
Expression ( Expression ) ,
Declaration ( Declaration ) ,
}
#[ derive(Debug, PartialEq) ]
pub enum Declaration {
FuncDecl ,
2017-09-13 22:40:05 -07:00
TypeDecl ( Rc < String > , TypeBody ) ,
TypeAlias ( Rc < String > , Rc < String > )
2017-09-11 15:42:49 -07:00
}
#[ derive(Debug, PartialEq) ]
2017-09-13 22:40:05 -07:00
pub struct TypeBody ( Vec < Variant > ) ;
#[ derive(Debug, PartialEq) ]
pub enum Variant {
Singleton ( Rc < String > ) ,
//ArgumentConstructor,
//Record
2017-09-08 16:42:42 -07:00
}
#[ derive(Debug, PartialEq) ]
pub enum Expression {
2017-09-11 03:10:10 -07:00
IntLiteral ( u64 ) ,
2017-09-08 16:42:42 -07:00
FloatLiteral ( f64 ) ,
2017-09-13 20:10:06 -07:00
BinExp ( Operation , Box < Expression > , Box < Expression > ) ,
Variable ( Rc < String > ) ,
Call {
name : Rc < String > ,
params : Vec < Expression > ,
}
2017-09-12 02:30:27 -07:00
}
#[ derive(Debug, PartialEq) ]
2017-09-12 05:16:37 -07:00
pub struct Operation {
op : Rc < String >
2017-09-12 02:30:27 -07:00
}
2017-09-12 05:16:37 -07:00
impl Operation {
fn min_precedence ( ) -> i32 {
i32 ::min_value ( )
}
fn get_precedence ( op : Rc < String > ) -> i32 {
let c : char = op . chars ( ) . next ( ) . unwrap ( ) ;
match c {
2017-09-12 02:30:27 -07:00
'+' | '-' = > 10 ,
'*' | '/' | '%' = > 20 ,
_ = > 30 ,
}
}
2017-09-08 16:42:42 -07:00
}
2017-09-11 02:07:17 -07:00
impl Parser {
fn program ( & mut self ) -> ParseResult < AST > {
let mut statements = Vec ::new ( ) ;
loop {
match self . peek ( ) {
EOF = > break ,
Newline | Semicolon = > {
self . next ( ) ;
continue ;
} ,
_ = > statements . push ( self . statement ( ) ? ) ,
}
}
Ok ( AST ( statements ) )
}
fn statement ( & mut self ) -> ParseResult < Statement > {
//TODO handle error recovery here
match self . peek ( ) {
2017-09-13 22:40:05 -07:00
Keyword ( Alias ) = > self . type_alias ( ) . map ( | alias | { Statement ::Declaration ( alias ) } ) ,
2017-09-11 02:07:17 -07:00
Keyword ( Type ) = > self . type_declaration ( ) . map ( | decl | { Statement ::Declaration ( decl ) } ) ,
Keyword ( Func ) = > self . func_declaration ( ) . map ( | func | { Statement ::Declaration ( func ) } ) ,
_ = > self . expression ( ) . map ( | expr | { Statement ::Expression ( expr ) } ) ,
}
}
2017-09-13 22:40:05 -07:00
fn type_alias ( & mut self ) -> ParseResult < Declaration > {
expect! ( self , Keyword ( Alias ) , " Expected 'alias' " ) ;
let alias = self . identifier ( ) ? ;
expect! ( self , Operator ( ref c ) if * * c = = " = " , " Expected '=' " ) ;
let original = self . identifier ( ) ? ;
Ok ( Declaration ::TypeAlias ( alias , original ) )
}
2017-09-11 02:07:17 -07:00
fn type_declaration ( & mut self ) -> ParseResult < Declaration > {
2017-09-11 15:42:49 -07:00
expect! ( self , Keyword ( Type ) , " Expected 'type' " ) ;
let name = self . identifier ( ) ? ;
2017-09-13 22:40:05 -07:00
expect! ( self , Operator ( ref c ) if * * c = = " = " , " Expected '=' " ) ;
let body = self . type_body ( ) ? ;
Ok ( Declaration ::TypeDecl ( name , body ) )
}
fn type_body ( & mut self ) -> ParseResult < TypeBody > {
let variant = Variant ::Singleton ( self . identifier ( ) ? ) ;
Ok ( TypeBody ( vec! ( variant ) ) )
2017-09-11 02:07:17 -07:00
}
fn func_declaration ( & mut self ) -> ParseResult < Declaration > {
2017-09-11 20:37:19 -07:00
expect! ( self , Keyword ( Func ) , " Expected 'fn' " ) ;
let name = self . identifier ( ) ? ;
expect! ( self , LParen , " Expected '(' " ) ;
let params = self . param_list ( ) ;
expect! ( self , RParen , " Expected ')' " ) ;
Ok ( Declaration ::FuncDecl )
}
fn param_list ( & mut self ) -> ParseResult < Vec < Rc < String > > > {
Ok ( vec! ( ) )
2017-09-11 02:07:17 -07:00
}
fn expression ( & mut self ) -> ParseResult < Expression > {
2017-09-12 05:16:37 -07:00
self . precedence_expr ( Operation ::min_precedence ( ) )
}
// this implements Pratt parsing, see http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
fn precedence_expr ( & mut self , precedence : i32 ) -> ParseResult < Expression > {
use self ::Expression ::* ;
//TODO clean this up
let mut lhs = self . primary ( ) ? ;
loop {
let op_str = match self . peek ( ) {
Operator ( op ) = > op ,
_ = > break ,
} ;
let new_precedence = Operation ::get_precedence ( op_str ) ;
if precedence > = new_precedence {
break ;
}
let op_str = match self . next ( ) {
Operator ( op ) = > op ,
_ = > unreachable! ( ) ,
} ;
let rhs = self . precedence_expr ( new_precedence ) ? ;
let operation = Operation { op : op_str } ;
lhs = BinExp ( operation , Box ::new ( lhs ) , Box ::new ( rhs ) ) ;
}
2017-09-12 02:30:27 -07:00
Ok ( lhs )
2017-09-11 02:07:17 -07:00
}
fn primary ( & mut self ) -> ParseResult < Expression > {
2017-09-13 03:46:16 -07:00
match self . peek ( ) {
LParen = > self . paren_expr ( ) ,
2017-09-13 20:10:06 -07:00
Identifier ( _ ) = > self . identifier_expr ( ) ,
2017-09-13 03:46:16 -07:00
_ = > self . literal ( ) ,
}
}
fn paren_expr ( & mut self ) -> ParseResult < Expression > {
expect! ( self , LParen , " Expected '(' " ) ;
let expr = self . expression ( ) ? ;
expect! ( self , RParen , " Expected ')' " ) ;
Ok ( expr )
2017-09-11 02:07:17 -07:00
}
2017-09-11 15:42:49 -07:00
2017-09-13 20:10:06 -07:00
fn identifier_expr ( & mut self ) -> ParseResult < Expression > {
let identifier = self . identifier ( ) ? ;
match self . peek ( ) {
LParen = > {
let call = self . call_expr ( ) ? ;
unimplemented! ( )
} ,
LSquareBracket = > {
let bracket = self . index_expr ( ) ? ;
unimplemented! ( )
} ,
_ = > Ok ( Expression ::Variable ( identifier ) )
}
}
fn call_expr ( & mut self ) -> ParseResult < Expression > {
unimplemented! ( )
}
fn index_expr ( & mut self ) -> ParseResult < Expression > {
unimplemented! ( )
}
2017-09-11 15:42:49 -07:00
fn identifier ( & mut self ) -> ParseResult < Rc < String > > {
match self . next ( ) {
Identifier ( s ) = > Ok ( s ) ,
p = > ParseError ::new ( & format! ( " Expected an identifier, got {:?} " , p ) ) ,
}
}
2017-09-11 02:07:17 -07:00
fn literal ( & mut self ) -> ParseResult < Expression > {
match self . peek ( ) {
DigitGroup ( _ ) | HexNumberSigil | BinNumberSigil | Period = > self . number_literal ( ) ,
2017-09-12 05:16:37 -07:00
t = > panic! ( " trying to parse {:?} " , t ) ,
2017-09-11 02:07:17 -07:00
}
}
fn number_literal ( & mut self ) -> ParseResult < Expression > {
match self . peek ( ) {
HexNumberSigil | BinNumberSigil = > self . int_literal ( ) ,
_ = > self . float_literal ( ) ,
}
}
fn int_literal ( & mut self ) -> ParseResult < Expression > {
use self ::Expression ::* ;
match self . next ( ) {
BinNumberSigil = > {
2017-09-11 23:27:15 -07:00
let digits = self . digits ( ) ? ;
let n = parse_binary ( digits ) ? ;
Ok ( IntLiteral ( n ) )
2017-09-11 02:07:17 -07:00
} ,
HexNumberSigil = > {
unimplemented! ( )
} ,
_ = > return ParseError ::new ( " Expected '0x' or '0b' " ) ,
}
}
fn float_literal ( & mut self ) -> ParseResult < Expression > {
use self ::Expression ::* ;
2017-09-11 02:38:27 -07:00
let mut digits = self . digits ( ) ? ;
if let TokenType ::Period = self . peek ( ) {
self . next ( ) ;
digits . push_str ( " . " ) ;
digits . push_str ( & self . digits ( ) ? ) ;
match digits . parse ::< f64 > ( ) {
Ok ( f ) = > Ok ( FloatLiteral ( f ) ) ,
Err ( e ) = > unimplemented! ( " Float didn't parse with error: {} " , e ) ,
}
} else {
match digits . parse ::< u64 > ( ) {
2017-09-11 03:10:10 -07:00
Ok ( d ) = > Ok ( IntLiteral ( d ) ) ,
2017-09-11 02:38:27 -07:00
Err ( e ) = > unimplemented! ( " Need to handle numbers that don't parse to a Rust u64 {} " , e ) ,
}
2017-09-11 02:07:17 -07:00
}
}
fn digits ( & mut self ) -> ParseResult < String > {
let mut ds = String ::new ( ) ;
loop {
2017-09-11 02:38:27 -07:00
match self . peek ( ) {
Underscore = > { self . next ( ) ; continue ; } ,
DigitGroup ( ref s ) = > { self . next ( ) ; ds . push_str ( s ) } ,
2017-09-11 02:07:48 -07:00
_ = > break ,
2017-09-11 02:07:17 -07:00
}
}
Ok ( ds )
}
}
2017-09-09 00:31:15 -07:00
2017-09-11 23:27:15 -07:00
fn parse_binary ( digits : String ) -> ParseResult < u64 > {
let mut result : u64 = 0 ;
let mut multiplier = 1 ;
for d in digits . chars ( ) . rev ( ) {
match d {
'1' = > result + = multiplier ,
'0' = > ( ) ,
_ = > return ParseError ::new ( " Encountered a character not '1' or '0 while parsing a binary literal " ) ,
}
multiplier * = 2 ;
}
Ok ( result )
}
2017-09-09 00:31:15 -07:00
pub fn parse ( input : Vec < Token > ) -> Result < AST , ParseError > {
let mut parser = Parser ::new ( input ) ;
parser . program ( )
2017-08-29 05:08:09 -07:00
}
2017-09-11 03:21:07 -07:00
#[ cfg(test) ]
mod parse_tests {
use super ::* ;
use super ::Statement ::* ;
2017-09-13 22:40:05 -07:00
use super ::Declaration ::* ;
2017-09-11 03:21:07 -07:00
use super ::Expression ::* ;
use super ::ParseError ;
2017-09-12 00:48:37 -07:00
2017-09-13 22:40:05 -07:00
macro_rules ! rc {
2017-09-13 22:47:25 -07:00
( $string :tt ) = > { Rc ::new ( stringify! ( $string ) . to_string ( ) ) }
2017-09-13 22:40:05 -07:00
}
2017-09-12 00:48:37 -07:00
macro_rules ! parse_test {
( $string :expr , $correct :expr ) = > { assert_eq! ( parse ( tokenize ( $string ) ) . unwrap ( ) , $correct ) }
}
2017-09-13 22:49:45 -07:00
2017-09-12 05:16:37 -07:00
macro_rules ! binexp {
( $op :expr , $lhs :expr , $rhs :expr ) = > { BinExp ( $op , Box ::new ( $lhs ) , Box ::new ( $rhs ) ) }
}
macro_rules ! op {
( $op :expr ) = > { Operation { op : Rc ::new ( $op . to_string ( ) ) } }
}
2017-09-13 20:49:17 -07:00
macro_rules ! var {
( $var :expr ) = > { Variable ( Rc ::new ( $var . to_string ( ) ) ) }
}
2017-09-12 00:48:37 -07:00
2017-09-11 03:21:07 -07:00
#[ test ]
2017-09-13 03:46:16 -07:00
fn parsing_number_literals_and_binexps ( ) {
2017-09-12 00:48:37 -07:00
parse_test! ( " 8.1 " , AST ( vec! [ Expression ( FloatLiteral ( 8.1 ) ) ] ) ) ;
parse_test! ( " 0b010 " , AST ( vec! [ Expression ( IntLiteral ( 2 ) ) ] ) ) ;
2017-09-12 02:33:21 -07:00
parse_test! ( " 3; 4; 4.3 " , AST (
vec! [ Expression ( IntLiteral ( 3 ) ) , Expression ( IntLiteral ( 4 ) ) ,
Expression ( FloatLiteral ( 4.3 ) ) ] ) ) ;
2017-09-12 05:16:37 -07:00
parse_test! ( " 1 + 2 * 3 " , AST ( vec !
[
Expression ( binexp! ( op! ( " + " ) , IntLiteral ( 1 ) , binexp! ( op! ( " * " ) , IntLiteral ( 2 ) , IntLiteral ( 3 ) ) ) )
] ) ) ;
parse_test! ( " 1 * 2 + 3 " , AST ( vec !
[
Expression ( binexp! ( op! ( " + " ) , binexp! ( op! ( " * " ) , IntLiteral ( 1 ) , IntLiteral ( 2 ) ) , IntLiteral ( 3 ) ) )
] ) ) ;
2017-09-12 15:52:23 -07:00
parse_test! ( " 1 && 2 " , AST ( vec! [ Expression ( binexp! ( op! ( " && " ) , IntLiteral ( 1 ) , IntLiteral ( 2 ) ) ) ] ) ) ;
2017-09-13 03:46:16 -07:00
parse_test! ( " 1 + 2 * 3 + 4 " , AST ( vec! [ Expression (
binexp! ( op! ( " + " ) ,
binexp! ( op! ( " + " ) , IntLiteral ( 1 ) ,
binexp! ( op! ( " * " ) , IntLiteral ( 2 ) , IntLiteral ( 3 ) )
) ,
IntLiteral ( 4 )
)
) ] ) ) ;
2017-09-12 15:52:23 -07:00
2017-09-13 03:46:16 -07:00
parse_test! ( " (1 + 2) * 3 " , AST ( vec !
[
Expression ( binexp! ( op! ( " * " ) , binexp! ( op! ( " + " ) , IntLiteral ( 1 ) , IntLiteral ( 2 ) ) , IntLiteral ( 3 ) ) )
] ) ) ;
2017-09-11 03:21:07 -07:00
}
2017-09-13 03:46:16 -07:00
2017-09-13 20:49:17 -07:00
#[ test ]
fn parsing_identifiers ( ) {
parse_test! ( " a " , AST ( vec! [ Expression ( var! ( " a " ) ) ] ) ) ;
parse_test! ( " a + b " , AST ( vec! [ Expression ( binexp! ( op! ( " + " ) , var! ( " a " ) , var! ( " b " ) ) ) ] ) ) ;
}
2017-09-13 22:40:05 -07:00
#[ test ]
fn parsing_types ( ) {
2017-09-13 22:47:25 -07:00
parse_test! ( " type Yolo = Yolo " , AST ( vec! [ Declaration ( TypeDecl ( rc! ( Yolo ) , TypeBody ( vec! [ Variant ::Singleton ( rc! ( Yolo ) ) ] ) ) ) ] ) ) ;
2017-09-13 22:49:45 -07:00
parse_test! ( " alias Sex = Drugs " , AST ( vec! [ Declaration ( TypeAlias ( rc! ( Sex ) , rc! ( Drugs ) ) ) ] ) ) ;
2017-09-13 22:40:05 -07:00
}
2017-09-11 03:21:07 -07:00
}