2017-09-06 05:09:20 -07:00
extern crate itertools ;
2017-08-29 04:27:07 -07:00
use language ::{ TokenError , ParseError } ;
2017-09-07 19:38:22 -07:00
use std ::collections ::HashMap ;
2017-08-29 05:08:09 -07:00
use std ::rc ::Rc ;
2017-09-06 05:09:20 -07:00
use std ::iter ::{ Enumerate , Peekable } ;
use self ::itertools ::Itertools ;
use std ::str ::Chars ;
2017-08-29 04:27:07 -07:00
2017-08-31 16:26:55 -07:00
#[ allow(dead_code) ]
2017-08-29 04:27:07 -07:00
#[ derive(Debug) ]
2017-09-04 12:17:20 -07:00
pub enum TokenType {
2017-09-06 05:09:20 -07:00
Newline , Semicolon ,
2017-09-04 12:17:20 -07:00
2017-09-06 05:09:20 -07:00
LParen , RParen ,
LSquareBracket , RSquareBracket ,
LAngleBracket , RAngleBracket ,
LCurlyBrace , RCurlyBrace ,
2017-09-07 22:29:23 -07:00
Pipe ,
2017-09-04 12:17:20 -07:00
2017-09-06 05:09:20 -07:00
Comma , Period , Colon , Underscore ,
2017-09-04 12:17:20 -07:00
2017-09-06 05:09:20 -07:00
Operator ( Rc < String > ) ,
DigitGroup ( Rc < String > ) , HexNumberSigil , BinNumberSigil ,
2017-08-29 05:08:09 -07:00
StrLiteral ( Rc < String > ) ,
Identifier ( Rc < String > ) ,
2017-09-04 12:17:20 -07:00
Keyword ( Kw ) ,
2017-09-06 05:09:20 -07:00
Error ( String ) ,
2017-08-29 05:08:09 -07:00
}
2017-09-07 19:38:22 -07:00
#[ derive(Debug, Clone, Copy) ]
2017-09-04 12:17:20 -07:00
pub enum Kw {
2017-09-07 22:29:23 -07:00
If , Else ,
2017-09-04 12:17:20 -07:00
Func ,
2017-09-06 05:09:20 -07:00
For ,
2017-09-07 22:29:23 -07:00
Var , Const , Let ,
Type , SelfType , SelfIdent ,
Trait , Impl ,
True , False
2017-09-04 12:17:20 -07:00
}
2017-09-07 19:38:22 -07:00
lazy_static! {
static ref KEYWORDS : HashMap < & 'static str , Kw > =
hashmap! {
" if " = > Kw ::If ,
" else " = > Kw ::Else ,
2017-09-07 22:29:23 -07:00
" fn " = > Kw ::Func ,
" for " = > Kw ::For ,
" var " = > Kw ::Var ,
" const " = > Kw ::Const ,
" let " = > Kw ::Let ,
" type " = > Kw ::Type ,
" Self " = > Kw ::SelfType ,
" self " = > Kw ::SelfIdent ,
" trait " = > Kw ::Trait ,
" impl " = > Kw ::Impl ,
" true " = > Kw ::True ,
" false " = > Kw ::False ,
2017-09-07 19:38:22 -07:00
} ;
}
2017-09-04 12:17:20 -07:00
#[ derive(Debug) ]
pub struct Token {
token_type : TokenType ,
2017-09-06 05:09:20 -07:00
offset : usize ,
2017-09-04 12:17:20 -07:00
}
2017-09-06 23:52:25 -07:00
impl Token {
pub fn get_error ( & self ) -> Option < & String > {
match self . token_type {
TokenType ::Error ( ref s ) = > Some ( s ) ,
_ = > None ,
}
}
}
2017-09-06 05:09:20 -07:00
fn is_digit ( c : & char ) -> bool {
c . is_digit ( 10 )
}
type CharIter < ' a > = Peekable < Enumerate < Chars < ' a > > > ;
2017-09-06 23:52:25 -07:00
pub fn tokenize ( input : & str ) -> Vec < Token > {
2017-09-06 05:09:20 -07:00
use self ::TokenType ::* ;
let mut tokens : Vec < Token > = Vec ::new ( ) ;
let mut input : CharIter = input . chars ( ) . enumerate ( ) . peekable ( ) ;
while let Some ( ( idx , c ) ) = input . next ( ) {
let cur_tok_type = match c {
'#' = > {
if let Some ( & ( _ , '{' ) ) = input . peek ( ) {
} else {
while let Some ( ( _ , c ) ) = input . next ( ) {
if c = = '\n' {
break ;
}
}
}
continue ;
} ,
2017-09-07 22:29:23 -07:00
c if c . is_whitespace ( ) & & c ! = '\n' = > continue ,
2017-09-06 05:09:20 -07:00
'\n' = > Newline , ';' = > Semicolon ,
2017-09-06 09:42:29 -07:00
':' = > Colon , ',' = > Comma , '.' = > Period ,
2017-09-06 05:09:20 -07:00
'(' = > LParen , ')' = > RParen ,
'{' = > LCurlyBrace , '}' = > RCurlyBrace ,
'<' = > LAngleBracket , '>' = > RAngleBracket ,
'[' = > LSquareBracket , ']' = > RSquareBracket ,
2017-09-07 22:29:23 -07:00
'|' = > Pipe ,
2017-09-06 09:42:29 -07:00
'"' = > handle_quote ( & mut input ) ,
2017-09-06 05:09:20 -07:00
c if is_digit ( & c ) = > handle_digit ( c , & mut input ) ,
2017-09-07 19:38:22 -07:00
c @ '_' | c if c . is_alphabetic ( ) = > handle_alphabetic ( c , & mut input ) , //TODO I'll probably have to rewrite this if I care about types being uppercase, also type parameterization
2017-09-06 09:42:29 -07:00
c = > handle_operator ( c , & mut input ) ,
2017-09-06 05:09:20 -07:00
} ;
tokens . push ( Token { token_type : cur_tok_type , offset : idx } ) ;
}
2017-09-06 23:52:25 -07:00
tokens
2017-09-06 05:09:20 -07:00
}
fn handle_digit ( c : char , input : & mut CharIter ) -> TokenType {
use self ::TokenType ::* ;
if c = = '0' & & input . peek ( ) . map_or ( false , | & ( _ , c ) | { c = = 'x' } ) {
input . next ( ) ;
HexNumberSigil
} else if c = = '0' & & input . peek ( ) . map_or ( false , | & ( _ , c ) | { c = = 'b' } ) {
input . next ( ) ;
BinNumberSigil
} else {
let mut buf = c . to_string ( ) ;
buf . extend ( input . peeking_take_while ( | & ( _ , ref c ) | is_digit ( c ) ) . map ( | ( _ , c ) | { c } ) ) ;
DigitGroup ( Rc ::new ( buf ) )
}
2017-08-29 05:08:09 -07:00
}
2017-09-06 09:42:29 -07:00
fn handle_quote ( input : & mut CharIter ) -> TokenType {
2017-09-06 16:52:49 -07:00
let mut buf = String ::new ( ) ;
2017-09-07 00:18:36 -07:00
loop {
match input . next ( ) . map ( | ( _ , c ) | { c } ) {
Some ( '"' ) = > break ,
Some ( '\\' ) = > {
let next = input . peek ( ) . map ( | & ( _ , c ) | { c } ) ;
if next = = Some ( 'n' ) {
input . next ( ) ;
buf . push ( '\n' )
} else if next = = Some ( '"' ) {
input . next ( ) ;
buf . push ( '"' ) ;
} else if next = = Some ( 't' ) {
input . next ( ) ;
buf . push ( '\t' ) ;
}
} ,
Some ( c ) = > buf . push ( c ) ,
None = > return TokenType ::Error ( format! ( " Unclosed string " ) ) ,
2017-09-06 16:52:49 -07:00
}
}
TokenType ::StrLiteral ( Rc ::new ( buf ) )
2017-09-06 09:42:29 -07:00
}
fn handle_alphabetic ( c : char , input : & mut CharIter ) -> TokenType {
2017-09-07 19:38:22 -07:00
let mut buf = String ::new ( ) ;
buf . push ( c ) ;
loop {
match input . peek ( ) . map ( | & ( _ , c ) | { c } ) {
Some ( c ) if c . is_alphanumeric ( ) = > {
input . next ( ) ;
buf . push ( c ) ;
} ,
_ = > break ,
}
}
match KEYWORDS . get ( buf . as_str ( ) ) {
Some ( kw ) = > TokenType ::Keyword ( kw . clone ( ) ) ,
None = > TokenType ::Identifier ( Rc ::new ( buf ) ) ,
}
2017-09-06 09:42:29 -07:00
}
fn handle_operator ( c : char , input : & mut CharIter ) -> TokenType {
2017-09-07 22:29:23 -07:00
let mut buf = String ::new ( ) ;
buf . push ( c ) ;
loop {
match input . peek ( ) . map ( | & ( _ , c ) | { c } ) {
Some ( c ) if ! c . is_alphabetic ( ) & & ! c . is_whitespace ( ) = > {
input . next ( ) ;
buf . push ( c ) ;
} ,
_ = > break
}
}
TokenType ::Operator ( Rc ::new ( buf ) )
2017-09-06 09:42:29 -07:00
}
2017-08-29 05:08:09 -07:00
/*
2017-08-30 04:28:52 -07:00
Schala EBNF grammar
type alias < name > = < other type >
type < name > = struct { < field > : < type > , * }
type < name > = Variant1 | Variant2 ( type , type ) | Variant3 struct { }
' ' = literal , all other symbols are nonterminals
2017-08-29 05:08:09 -07:00
program := ( statement delimiter ? ) *
2017-08-30 04:28:52 -07:00
delimiter := ' Newline ' | ';'
2017-08-29 05:08:09 -07:00
statement := declaration | expression
2017-08-30 04:28:52 -07:00
declaration := module | function | type_decl
type_decl := ' type ' type_format
type_format := ' alias ' ' = ' type | type_constructor
type_constructor := capital_ident '=' type_rhs
type_rhs := struct_decl | type_variant ( '|' type_variant ) *
struct_decl := ' struct ' ' { ' ( ident ':' type ) * '}'
type_variant := capital_ident | tuple_type | capital_ident struct_decl
tuple_type := // something like Variant(a,b)
type := // something like Type[A[b]]
ascription := expression ( ':' type ) +
function := ' fn ' prototype '{' ( statement ) * '}'
prototype := identifier '(' identlist ')'
identlist := identifier ( ',' identifier ) * | ε
2017-08-29 05:08:09 -07:00
declaration := FN prototype LCurlyBrace ( statement ) * RCurlyBrace
prototype := identifier LParen identlist RParen
identlist := Ident ( Comma Ident ) * | ε
exprlist := Expression ( Comma Expression ) * | ε
itemlist := Ident COLON Expression ( Comma Ident COLON Expression ) * | ε
expression := postop_expression ( op postop_expression ) *
postop_expression := primary_expression postop
primary_expression := number_expr | String | identifier_expr | paren_expr | conditional_expr | while_expr | lambda_expr | list_expr | struct_expr
number_expr := ( PLUS | MINUS ) number_expr | Number
identifier_expr := call_expression | Variable
list_expr := LSquareBracket exprlist RSquareBracket
struct_expr := LCurlyBrace itemlist RCurlyBrace
call_expression := Identifier LParen exprlist RParen
while_expr := WHILE primary_expression LCurlyBrace ( expression delimiter ) * RCurlyBrace
paren_expr := LParen expression RParen
conditional_expr := IF expression LCurlyBrace ( expression delimiter ) * RCurlyBrace ( LCurlyBrace ( expresion delimiter ) * RCurlyBrace ) ?
lambda_expr := FN LParen identlist RParen LCurlyBrace ( expression delimiter ) * RCurlyBrace
lambda_call := | LParen exprlist RParen
postop := ε | LParen exprlist RParen | LBracket expression RBracket
op := '+' , '-' , etc .
* /
2017-08-31 16:26:55 -07:00
#[ allow(dead_code) ]
2017-08-29 04:27:07 -07:00
#[ derive(Debug) ]
2017-08-29 05:08:09 -07:00
pub struct AST { }
2017-08-31 16:26:55 -07:00
#[ allow(dead_code) ]
2017-08-31 19:18:56 -07:00
pub fn parse ( _input : Vec < Token > ) -> Result < AST , ParseError > {
2017-08-29 05:08:09 -07:00
Ok ( AST { } )
}