Changing how parsing works again

Someone wrote a port of the LLVM kaleidoscope tutorial to rust, namely
https://github.com/jauhien/iron-kaleidoscope

I'm just gonna follow this along
This commit is contained in:
greg 2016-01-04 03:55:06 -08:00
parent 61c36c4def
commit 09d524c74a
4 changed files with 13 additions and 315 deletions

View File

@ -1,3 +1,4 @@
/*
use parser::AST; use parser::AST;
struct Evaluator { struct Evaluator {
@ -100,3 +101,4 @@ mod tests {
assert_eq!(output, "3"); assert_eq!(output, "3");
} }
} }
*/

View File

@ -11,13 +11,9 @@ use simplerepl::{REPL, ReplState};
use tokenizer::tokenize; use tokenizer::tokenize;
mod tokenizer; mod tokenizer;
use parser::{ParseResult, parse}; use parser::parse;
mod parser; mod parser;
use evaluator::{evaluate};
mod evaluator;
fn main() { fn main() {
let args: Vec<String> = std::env::args().collect(); let args: Vec<String> = std::env::args().collect();
println!("Schala v 0.02"); println!("Schala v 0.02");
@ -60,20 +56,14 @@ impl ReplState for InterpreterState {
fn repl_handler(input: &str, state: &mut InterpreterState) -> String { fn repl_handler(input: &str, state: &mut InterpreterState) -> String {
if state.show_tokens { if state.show_tokens {
println!("Tokens: {:?}", tokenize(input)) println!("Not implemented")
} }
if state.show_parse { if state.show_parse {
println!("Parse: {:?}", parse(tokenize(input))) println!("not implemented")
} }
let parse_result = parse(tokenize(input)); let tokens = tokenize(input);
match parse_result { let ast = parse(&tokens);
Ok(ast) => { format!("Changing how parsing works again")
format!("{}", evaluate(ast))
},
Err(err) => {
format!("Parse error: {:?}", err)
}
}
} }

View File

@ -1,222 +1,11 @@
use std::iter::Peekable;
use std::vec::IntoIter;
use std::fmt;
use tokenizer::Token; use tokenizer::Token;
#[derive(Debug)] #[derive(Debug)]
pub enum AST { pub struct ParseResult {
BinOp(Box<AST>, Box<AST>, Box<AST>), msg: i32
Number(f64),
Name(String),
Block(Vec<AST>),
Definition(String, Box<AST>),
} }
impl fmt::Display for AST { pub fn parse(tokens: &[Token]) -> ParseResult {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self { ParseResult { msg: 0 }
&AST::Number(ref n) => write!(f, "{}", n),
&AST::Name(ref s) => write!(f, "{}", s),
astnode => write!(f, "UNEXPANDED AST NODE: {:?}", astnode)
}
}
} }
#[derive(Debug)]
pub struct ParseError {
err: String
}
pub type ParseResult<T> = Result<T, ParseError>;
/* grammar
program : block EOF
block : (statement sep)+
sep : NEWLINE | SEMICOLON
statement: expr | definition
definition: 'let' NAME '=' expr
expr : term ((PLUS|MIMUS) term)*
term : factor ((MUL | DIV) factor)*
factor : NUM | LPAREN expr RPAREN
*/
struct Parser {
tokens: Peekable<IntoIter<Token>>
}
macro_rules! parse_error {
($($text:tt)*) => {
Err(ParseError { err: format!($($text)*) })
}
}
impl Parser {
fn next(&mut self) -> Option<Token> {
self.tokens.next()
}
//TODO see if I can get rid of the need for this move
fn lookahead(&mut self) -> Option<Token> {
self.tokens.peek().map(|x| x.clone())
}
}
impl Parser {
fn expect(&mut self, expected: Token) -> ParseResult<()> {
match self.next() {
Some(ref next) if *next == expected => Ok(()),
Some(next) => {
return parse_error!("Expected {:?} but got {:?}", expected, next);
},
None => {
return parse_error!("Expected {:?} but got end of input", expected);
}
}
}
fn expect_identifier(&mut self) -> ParseResult<String> {
use tokenizer::Token::*;
match self.next() {
Some(Identifier(ref s)) => Ok(s.to_string()),
Some(next) => {
return parse_error!("Expected identifier but got {:?}", next);
}
None => {
return parse_error!("Expected identifier but got end of input");
}
}
}
fn expect_num_literal(&mut self) -> ParseResult<f64> {
use tokenizer::Token::*;
match self.next() {
Some(NumLiteral(f)) => Ok(f),
Some(t) => {
return parse_error!("Expected NumLiteral, but got {:?}", t);
},
None => {
return parse_error!("Expected NumLiteral but got end of input");
}
}
}
fn parse(&mut self) -> ParseResult<AST> {
let r = self.block();
try!(self.expect(Token::EOF));
r
}
fn block(&mut self) -> ParseResult<AST> {
use tokenizer::Token::*;
let mut block_nodes: Vec<AST> = Vec::new();
loop {
let s: AST = try!(self.statement());
block_nodes.push(s);
match self.lookahead() {
Some(Semicolon) | Some(Newline) => {
self.next();
if let Some(EOF) = self.lookahead() {
break
}
},
_ => break
}
}
Ok(AST::Block(block_nodes))
}
fn statement(&mut self) -> ParseResult<AST> {
use tokenizer::Token::*;
use tokenizer::Kw;
let r = match self.lookahead() {
Some(Keyword(Kw::Let)) => try!(self.definition()),
_ => try!(self.expr())
};
Ok(r)
}
fn definition(&mut self) -> ParseResult<AST> {
use tokenizer::Token::*;
use tokenizer::Kw;
try!(self.expect(Keyword(Kw::Let)));
let name = try!(self.expect_identifier());
match self.lookahead() {
Some(Keyword(Kw::Assign)) => { self.next(); },
_ => return parse_error!("Expected `=`"),
}
let expr = try!(self.expr());
Ok(AST::Definition(name, Box::new(expr)))
}
fn expr(&mut self) -> ParseResult<AST> {
use tokenizer::Token::*;
let mut lhs = try!(self.term());
loop {
match self.lookahead() {
Some(Identifier(ref s)) if s == "+" || s == "-" => {
let op_token = self.next().unwrap();
let op = AST::Name(match op_token { Identifier(s) => s, _ => panic!("lol") });
let rhs = try!(self.term());
lhs = AST::BinOp(
Box::new(lhs),
Box::new(op),
Box::new(rhs));
},
_ => break
}
}
Ok(lhs)
}
fn term(&mut self) -> ParseResult<AST> {
use tokenizer::Token::*;
let mut lhs = try!(self.factor());
loop {
match self.lookahead() {
Some(Identifier(ref s)) if s == "*" || s == "/" => {
let op_token = self.next().unwrap();
let op = AST::Name(match op_token { Identifier(s) => s, _ => panic!("lol") });
let rhs = try!(self.factor());
lhs = AST::BinOp(
Box::new(lhs),
Box::new(op),
Box::new(rhs));
},
_ => break
}
}
Ok(lhs)
}
fn factor(&mut self) -> ParseResult<AST> {
use tokenizer::Token::*;
match self.lookahead() {
Some(LParen) => {
self.next();
let expr = try!(self.expr());
try!(self.expect(RParen));
Ok(expr)
},
Some(NumLiteral(n)) => {
self.next();
Ok(AST::Number(n))
},
x => parse_error!("Expected LParen or NumLiteral, got {:?}", x )
}
}
}
pub fn parse(input: Vec<Token>) -> ParseResult<AST> {
let iter = input.into_iter().peekable();
let mut parser = Parser { tokens: iter };
return parser.parse();
}

View File

@ -28,92 +28,9 @@ pub enum Kw {
pub fn tokenize(input: &str) -> Vec<Token> { pub fn tokenize(input: &str) -> Vec<Token> {
let mut tokens = Vec::new(); let mut tokens = Vec::new();
let mut iterator = input.chars().peekable();
fn ends_identifier(c: char) -> bool {
match c {
c if char::is_whitespace(c) => true,
',' => true,
';' => true,
'(' => true,
')' => true,
_ => false
}
}
while let Some(c) = iterator.next() {
if char::is_whitespace(c) && c != '\n' {
continue;
} else if c == '"' {
let mut buffer = String::with_capacity(20);
loop {
match iterator.next() {
Some(x) if x == '"' => break,
Some(x) => buffer.push(x),
None => return tokens,
}
}
tokens.push(Token::StrLiteral(buffer));
} else if c == '#' {
while let Some(x) = iterator.next() {
if x == '\n' {
break;
}
}
} else if c == ';' {
tokens.push(Token::Semicolon);
} else if c == '\n' {
tokens.push(Token::Newline);
} else if c == '(' {
tokens.push(Token::LParen);
} else if c == ')' {
tokens.push(Token::RParen);
} else if c == ',' {
tokens.push(Token::Comma);
} else if c == '.' {
tokens.push(Token::Period);
} else {
let mut buffer = String::with_capacity(20);
buffer.push(c);
while let Some(x) = iterator.peek().cloned() {
if ends_identifier(x) {
break;
}
buffer.push(iterator.next().unwrap());
}
match buffer.parse::<f64>() {
Ok(f) => tokens.push(Token::NumLiteral(f)),
_ => tokens.push(handle_identifier(buffer))
}
}
}
tokens.push(Token::EOF);
tokens tokens
} }
fn handle_identifier(identifier: String) -> Token {
let keyword = match &identifier[..] {
"let" => Kw::Let,
"if" => Kw::If,
"then" => Kw::Then,
"else" => Kw::Else,
"while" => Kw::While,
"end" => Kw::End,
"fn" => Kw::Fn,
"null" => Kw::Null,
"=" => Kw::Assign,
_ => return Token::Identifier(identifier)
};
return Token::Keyword(keyword);
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;