From 09d524c74a2b59843a86abeacb1baf2e6f064d6f Mon Sep 17 00:00:00 2001 From: greg Date: Mon, 4 Jan 2016 03:55:06 -0800 Subject: [PATCH] Changing how parsing works again Someone wrote a port of the LLVM kaleidoscope tutorial to rust, namely https://github.com/jauhien/iron-kaleidoscope I'm just gonna follow this along --- src/evaluator.rs | 2 + src/main.rs | 22 ++--- src/parser.rs | 221 ++--------------------------------------------- src/tokenizer.rs | 83 ------------------ 4 files changed, 13 insertions(+), 315 deletions(-) diff --git a/src/evaluator.rs b/src/evaluator.rs index 0f2aa35..988612e 100644 --- a/src/evaluator.rs +++ b/src/evaluator.rs @@ -1,3 +1,4 @@ +/* use parser::AST; struct Evaluator { @@ -100,3 +101,4 @@ mod tests { assert_eq!(output, "3"); } } +*/ diff --git a/src/main.rs b/src/main.rs index 2fa22cd..d792f7b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,13 +11,9 @@ use simplerepl::{REPL, ReplState}; use tokenizer::tokenize; mod tokenizer; -use parser::{ParseResult, parse}; +use parser::parse; mod parser; -use evaluator::{evaluate}; -mod evaluator; - - fn main() { let args: Vec = std::env::args().collect(); println!("Schala v 0.02"); @@ -60,20 +56,14 @@ impl ReplState for InterpreterState { fn repl_handler(input: &str, state: &mut InterpreterState) -> String { if state.show_tokens { - println!("Tokens: {:?}", tokenize(input)) + println!("Not implemented") } if state.show_parse { - println!("Parse: {:?}", parse(tokenize(input))) + println!("not implemented") } - let parse_result = parse(tokenize(input)); - match parse_result { - Ok(ast) => { - format!("{}", evaluate(ast)) - }, - Err(err) => { - format!("Parse error: {:?}", err) - } - } + let tokens = tokenize(input); + let ast = parse(&tokens); + format!("Changing how parsing works again") } diff --git a/src/parser.rs b/src/parser.rs index 1bc5191..5d523e0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,222 +1,11 @@ -use std::iter::Peekable; -use std::vec::IntoIter; -use std::fmt; - use tokenizer::Token; #[derive(Debug)] -pub enum AST { - BinOp(Box, Box, Box), - Number(f64), - Name(String), - Block(Vec), - Definition(String, Box), +pub struct ParseResult { + msg: i32 } -impl fmt::Display for AST { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - &AST::Number(ref n) => write!(f, "{}", n), - &AST::Name(ref s) => write!(f, "{}", s), - astnode => write!(f, "UNEXPANDED AST NODE: {:?}", astnode) - } - } +pub fn parse(tokens: &[Token]) -> ParseResult { + + ParseResult { msg: 0 } } - -#[derive(Debug)] -pub struct ParseError { - err: String -} - -pub type ParseResult = Result; - -/* grammar - - program : block EOF - block : (statement sep)+ - sep : NEWLINE | SEMICOLON - statement: expr | definition - definition: 'let' NAME '=' expr - expr : term ((PLUS|MIMUS) term)* - term : factor ((MUL | DIV) factor)* - factor : NUM | LPAREN expr RPAREN - -*/ - -struct Parser { - tokens: Peekable> -} - -macro_rules! parse_error { - ($($text:tt)*) => { - Err(ParseError { err: format!($($text)*) }) - } -} - -impl Parser { - fn next(&mut self) -> Option { - self.tokens.next() - } - - //TODO see if I can get rid of the need for this move - fn lookahead(&mut self) -> Option { - self.tokens.peek().map(|x| x.clone()) - } -} - -impl Parser { - - fn expect(&mut self, expected: Token) -> ParseResult<()> { - match self.next() { - Some(ref next) if *next == expected => Ok(()), - Some(next) => { - return parse_error!("Expected {:?} but got {:?}", expected, next); - }, - None => { - return parse_error!("Expected {:?} but got end of input", expected); - } - } - } - - - fn expect_identifier(&mut self) -> ParseResult { - use tokenizer::Token::*; - match self.next() { - Some(Identifier(ref s)) => Ok(s.to_string()), - Some(next) => { - return parse_error!("Expected identifier but got {:?}", next); - } - None => { - return parse_error!("Expected identifier but got end of input"); - } - } - } - - fn expect_num_literal(&mut self) -> ParseResult { - use tokenizer::Token::*; - match self.next() { - Some(NumLiteral(f)) => Ok(f), - Some(t) => { - return parse_error!("Expected NumLiteral, but got {:?}", t); - }, - None => { - return parse_error!("Expected NumLiteral but got end of input"); - } - } - } - - fn parse(&mut self) -> ParseResult { - let r = self.block(); - try!(self.expect(Token::EOF)); - r - } - - fn block(&mut self) -> ParseResult { - use tokenizer::Token::*; - let mut block_nodes: Vec = Vec::new(); - loop { - let s: AST = try!(self.statement()); - block_nodes.push(s); - match self.lookahead() { - Some(Semicolon) | Some(Newline) => { - self.next(); - if let Some(EOF) = self.lookahead() { - break - } - }, - _ => break - } - } - - Ok(AST::Block(block_nodes)) - } - - fn statement(&mut self) -> ParseResult { - use tokenizer::Token::*; - use tokenizer::Kw; - let r = match self.lookahead() { - Some(Keyword(Kw::Let)) => try!(self.definition()), - _ => try!(self.expr()) - }; - Ok(r) - } - - fn definition(&mut self) -> ParseResult { - use tokenizer::Token::*; - use tokenizer::Kw; - try!(self.expect(Keyword(Kw::Let))); - let name = try!(self.expect_identifier()); - match self.lookahead() { - Some(Keyword(Kw::Assign)) => { self.next(); }, - _ => return parse_error!("Expected `=`"), - } - - let expr = try!(self.expr()); - - Ok(AST::Definition(name, Box::new(expr))) - } - - fn expr(&mut self) -> ParseResult { - use tokenizer::Token::*; - let mut lhs = try!(self.term()); - loop { - match self.lookahead() { - Some(Identifier(ref s)) if s == "+" || s == "-" => { - let op_token = self.next().unwrap(); - let op = AST::Name(match op_token { Identifier(s) => s, _ => panic!("lol") }); - let rhs = try!(self.term()); - lhs = AST::BinOp( - Box::new(lhs), - Box::new(op), - Box::new(rhs)); - }, - _ => break - } - } - Ok(lhs) - } - - fn term(&mut self) -> ParseResult { - use tokenizer::Token::*; - let mut lhs = try!(self.factor()); - loop { - match self.lookahead() { - Some(Identifier(ref s)) if s == "*" || s == "/" => { - let op_token = self.next().unwrap(); - let op = AST::Name(match op_token { Identifier(s) => s, _ => panic!("lol") }); - let rhs = try!(self.factor()); - lhs = AST::BinOp( - Box::new(lhs), - Box::new(op), - Box::new(rhs)); - }, - _ => break - } - } - Ok(lhs) - } - - fn factor(&mut self) -> ParseResult { - use tokenizer::Token::*; - match self.lookahead() { - Some(LParen) => { - self.next(); - let expr = try!(self.expr()); - try!(self.expect(RParen)); - Ok(expr) - }, - Some(NumLiteral(n)) => { - self.next(); - Ok(AST::Number(n)) - }, - x => parse_error!("Expected LParen or NumLiteral, got {:?}", x ) - } - } -} - -pub fn parse(input: Vec) -> ParseResult { - let iter = input.into_iter().peekable(); - let mut parser = Parser { tokens: iter }; - return parser.parse(); -} - diff --git a/src/tokenizer.rs b/src/tokenizer.rs index e07f258..01511a6 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -28,92 +28,9 @@ pub enum Kw { pub fn tokenize(input: &str) -> Vec { let mut tokens = Vec::new(); - let mut iterator = input.chars().peekable(); - - fn ends_identifier(c: char) -> bool { - match c { - c if char::is_whitespace(c) => true, - ',' => true, - ';' => true, - '(' => true, - ')' => true, - _ => false - } - } - - while let Some(c) = iterator.next() { - - if char::is_whitespace(c) && c != '\n' { - continue; - } else if c == '"' { - - let mut buffer = String::with_capacity(20); - loop { - match iterator.next() { - Some(x) if x == '"' => break, - Some(x) => buffer.push(x), - None => return tokens, - } - } - tokens.push(Token::StrLiteral(buffer)); - - } else if c == '#' { - while let Some(x) = iterator.next() { - if x == '\n' { - break; - } - } - } else if c == ';' { - tokens.push(Token::Semicolon); - } else if c == '\n' { - tokens.push(Token::Newline); - } else if c == '(' { - tokens.push(Token::LParen); - } else if c == ')' { - tokens.push(Token::RParen); - } else if c == ',' { - tokens.push(Token::Comma); - } else if c == '.' { - tokens.push(Token::Period); - } else { - let mut buffer = String::with_capacity(20); - buffer.push(c); - - while let Some(x) = iterator.peek().cloned() { - if ends_identifier(x) { - break; - } - buffer.push(iterator.next().unwrap()); - } - - match buffer.parse::() { - Ok(f) => tokens.push(Token::NumLiteral(f)), - _ => tokens.push(handle_identifier(buffer)) - } - } - } - tokens.push(Token::EOF); tokens } -fn handle_identifier(identifier: String) -> Token { - - let keyword = match &identifier[..] { - "let" => Kw::Let, - "if" => Kw::If, - "then" => Kw::Then, - "else" => Kw::Else, - "while" => Kw::While, - "end" => Kw::End, - "fn" => Kw::Fn, - "null" => Kw::Null, - "=" => Kw::Assign, - _ => return Token::Identifier(identifier) - }; - - return Token::Keyword(keyword); -} - #[cfg(test)] mod tests { use super::*;