Changing how parsing works again
Someone wrote a port of the LLVM kaleidoscope tutorial to rust, namely https://github.com/jauhien/iron-kaleidoscope I'm just gonna follow this along
This commit is contained in:
parent
61c36c4def
commit
09d524c74a
@ -1,3 +1,4 @@
|
||||
/*
|
||||
use parser::AST;
|
||||
|
||||
struct Evaluator {
|
||||
@ -100,3 +101,4 @@ mod tests {
|
||||
assert_eq!(output, "3");
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
22
src/main.rs
22
src/main.rs
@ -11,13 +11,9 @@ use simplerepl::{REPL, ReplState};
|
||||
use tokenizer::tokenize;
|
||||
mod tokenizer;
|
||||
|
||||
use parser::{ParseResult, parse};
|
||||
use parser::parse;
|
||||
mod parser;
|
||||
|
||||
use evaluator::{evaluate};
|
||||
mod evaluator;
|
||||
|
||||
|
||||
fn main() {
|
||||
let args: Vec<String> = std::env::args().collect();
|
||||
println!("Schala v 0.02");
|
||||
@ -60,20 +56,14 @@ impl ReplState for InterpreterState {
|
||||
|
||||
fn repl_handler(input: &str, state: &mut InterpreterState) -> String {
|
||||
if state.show_tokens {
|
||||
println!("Tokens: {:?}", tokenize(input))
|
||||
println!("Not implemented")
|
||||
}
|
||||
|
||||
if state.show_parse {
|
||||
println!("Parse: {:?}", parse(tokenize(input)))
|
||||
println!("not implemented")
|
||||
}
|
||||
|
||||
let parse_result = parse(tokenize(input));
|
||||
match parse_result {
|
||||
Ok(ast) => {
|
||||
format!("{}", evaluate(ast))
|
||||
},
|
||||
Err(err) => {
|
||||
format!("Parse error: {:?}", err)
|
||||
}
|
||||
}
|
||||
let tokens = tokenize(input);
|
||||
let ast = parse(&tokens);
|
||||
format!("Changing how parsing works again")
|
||||
}
|
||||
|
219
src/parser.rs
219
src/parser.rs
@ -1,222 +1,11 @@
|
||||
use std::iter::Peekable;
|
||||
use std::vec::IntoIter;
|
||||
use std::fmt;
|
||||
|
||||
use tokenizer::Token;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum AST {
|
||||
BinOp(Box<AST>, Box<AST>, Box<AST>),
|
||||
Number(f64),
|
||||
Name(String),
|
||||
Block(Vec<AST>),
|
||||
Definition(String, Box<AST>),
|
||||
pub struct ParseResult {
|
||||
msg: i32
|
||||
}
|
||||
|
||||
impl fmt::Display for AST {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
&AST::Number(ref n) => write!(f, "{}", n),
|
||||
&AST::Name(ref s) => write!(f, "{}", s),
|
||||
astnode => write!(f, "UNEXPANDED AST NODE: {:?}", astnode)
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn parse(tokens: &[Token]) -> ParseResult {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ParseError {
|
||||
err: String
|
||||
ParseResult { msg: 0 }
|
||||
}
|
||||
|
||||
pub type ParseResult<T> = Result<T, ParseError>;
|
||||
|
||||
/* grammar
|
||||
|
||||
program : block EOF
|
||||
block : (statement sep)+
|
||||
sep : NEWLINE | SEMICOLON
|
||||
statement: expr | definition
|
||||
definition: 'let' NAME '=' expr
|
||||
expr : term ((PLUS|MIMUS) term)*
|
||||
term : factor ((MUL | DIV) factor)*
|
||||
factor : NUM | LPAREN expr RPAREN
|
||||
|
||||
*/
|
||||
|
||||
struct Parser {
|
||||
tokens: Peekable<IntoIter<Token>>
|
||||
}
|
||||
|
||||
macro_rules! parse_error {
|
||||
($($text:tt)*) => {
|
||||
Err(ParseError { err: format!($($text)*) })
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
fn next(&mut self) -> Option<Token> {
|
||||
self.tokens.next()
|
||||
}
|
||||
|
||||
//TODO see if I can get rid of the need for this move
|
||||
fn lookahead(&mut self) -> Option<Token> {
|
||||
self.tokens.peek().map(|x| x.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
|
||||
fn expect(&mut self, expected: Token) -> ParseResult<()> {
|
||||
match self.next() {
|
||||
Some(ref next) if *next == expected => Ok(()),
|
||||
Some(next) => {
|
||||
return parse_error!("Expected {:?} but got {:?}", expected, next);
|
||||
},
|
||||
None => {
|
||||
return parse_error!("Expected {:?} but got end of input", expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn expect_identifier(&mut self) -> ParseResult<String> {
|
||||
use tokenizer::Token::*;
|
||||
match self.next() {
|
||||
Some(Identifier(ref s)) => Ok(s.to_string()),
|
||||
Some(next) => {
|
||||
return parse_error!("Expected identifier but got {:?}", next);
|
||||
}
|
||||
None => {
|
||||
return parse_error!("Expected identifier but got end of input");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn expect_num_literal(&mut self) -> ParseResult<f64> {
|
||||
use tokenizer::Token::*;
|
||||
match self.next() {
|
||||
Some(NumLiteral(f)) => Ok(f),
|
||||
Some(t) => {
|
||||
return parse_error!("Expected NumLiteral, but got {:?}", t);
|
||||
},
|
||||
None => {
|
||||
return parse_error!("Expected NumLiteral but got end of input");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse(&mut self) -> ParseResult<AST> {
|
||||
let r = self.block();
|
||||
try!(self.expect(Token::EOF));
|
||||
r
|
||||
}
|
||||
|
||||
fn block(&mut self) -> ParseResult<AST> {
|
||||
use tokenizer::Token::*;
|
||||
let mut block_nodes: Vec<AST> = Vec::new();
|
||||
loop {
|
||||
let s: AST = try!(self.statement());
|
||||
block_nodes.push(s);
|
||||
match self.lookahead() {
|
||||
Some(Semicolon) | Some(Newline) => {
|
||||
self.next();
|
||||
if let Some(EOF) = self.lookahead() {
|
||||
break
|
||||
}
|
||||
},
|
||||
_ => break
|
||||
}
|
||||
}
|
||||
|
||||
Ok(AST::Block(block_nodes))
|
||||
}
|
||||
|
||||
fn statement(&mut self) -> ParseResult<AST> {
|
||||
use tokenizer::Token::*;
|
||||
use tokenizer::Kw;
|
||||
let r = match self.lookahead() {
|
||||
Some(Keyword(Kw::Let)) => try!(self.definition()),
|
||||
_ => try!(self.expr())
|
||||
};
|
||||
Ok(r)
|
||||
}
|
||||
|
||||
fn definition(&mut self) -> ParseResult<AST> {
|
||||
use tokenizer::Token::*;
|
||||
use tokenizer::Kw;
|
||||
try!(self.expect(Keyword(Kw::Let)));
|
||||
let name = try!(self.expect_identifier());
|
||||
match self.lookahead() {
|
||||
Some(Keyword(Kw::Assign)) => { self.next(); },
|
||||
_ => return parse_error!("Expected `=`"),
|
||||
}
|
||||
|
||||
let expr = try!(self.expr());
|
||||
|
||||
Ok(AST::Definition(name, Box::new(expr)))
|
||||
}
|
||||
|
||||
fn expr(&mut self) -> ParseResult<AST> {
|
||||
use tokenizer::Token::*;
|
||||
let mut lhs = try!(self.term());
|
||||
loop {
|
||||
match self.lookahead() {
|
||||
Some(Identifier(ref s)) if s == "+" || s == "-" => {
|
||||
let op_token = self.next().unwrap();
|
||||
let op = AST::Name(match op_token { Identifier(s) => s, _ => panic!("lol") });
|
||||
let rhs = try!(self.term());
|
||||
lhs = AST::BinOp(
|
||||
Box::new(lhs),
|
||||
Box::new(op),
|
||||
Box::new(rhs));
|
||||
},
|
||||
_ => break
|
||||
}
|
||||
}
|
||||
Ok(lhs)
|
||||
}
|
||||
|
||||
fn term(&mut self) -> ParseResult<AST> {
|
||||
use tokenizer::Token::*;
|
||||
let mut lhs = try!(self.factor());
|
||||
loop {
|
||||
match self.lookahead() {
|
||||
Some(Identifier(ref s)) if s == "*" || s == "/" => {
|
||||
let op_token = self.next().unwrap();
|
||||
let op = AST::Name(match op_token { Identifier(s) => s, _ => panic!("lol") });
|
||||
let rhs = try!(self.factor());
|
||||
lhs = AST::BinOp(
|
||||
Box::new(lhs),
|
||||
Box::new(op),
|
||||
Box::new(rhs));
|
||||
},
|
||||
_ => break
|
||||
}
|
||||
}
|
||||
Ok(lhs)
|
||||
}
|
||||
|
||||
fn factor(&mut self) -> ParseResult<AST> {
|
||||
use tokenizer::Token::*;
|
||||
match self.lookahead() {
|
||||
Some(LParen) => {
|
||||
self.next();
|
||||
let expr = try!(self.expr());
|
||||
try!(self.expect(RParen));
|
||||
Ok(expr)
|
||||
},
|
||||
Some(NumLiteral(n)) => {
|
||||
self.next();
|
||||
Ok(AST::Number(n))
|
||||
},
|
||||
x => parse_error!("Expected LParen or NumLiteral, got {:?}", x )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(input: Vec<Token>) -> ParseResult<AST> {
|
||||
let iter = input.into_iter().peekable();
|
||||
let mut parser = Parser { tokens: iter };
|
||||
return parser.parse();
|
||||
}
|
||||
|
||||
|
@ -28,92 +28,9 @@ pub enum Kw {
|
||||
|
||||
pub fn tokenize(input: &str) -> Vec<Token> {
|
||||
let mut tokens = Vec::new();
|
||||
let mut iterator = input.chars().peekable();
|
||||
|
||||
fn ends_identifier(c: char) -> bool {
|
||||
match c {
|
||||
c if char::is_whitespace(c) => true,
|
||||
',' => true,
|
||||
';' => true,
|
||||
'(' => true,
|
||||
')' => true,
|
||||
_ => false
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(c) = iterator.next() {
|
||||
|
||||
if char::is_whitespace(c) && c != '\n' {
|
||||
continue;
|
||||
} else if c == '"' {
|
||||
|
||||
let mut buffer = String::with_capacity(20);
|
||||
loop {
|
||||
match iterator.next() {
|
||||
Some(x) if x == '"' => break,
|
||||
Some(x) => buffer.push(x),
|
||||
None => return tokens,
|
||||
}
|
||||
}
|
||||
tokens.push(Token::StrLiteral(buffer));
|
||||
|
||||
} else if c == '#' {
|
||||
while let Some(x) = iterator.next() {
|
||||
if x == '\n' {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if c == ';' {
|
||||
tokens.push(Token::Semicolon);
|
||||
} else if c == '\n' {
|
||||
tokens.push(Token::Newline);
|
||||
} else if c == '(' {
|
||||
tokens.push(Token::LParen);
|
||||
} else if c == ')' {
|
||||
tokens.push(Token::RParen);
|
||||
} else if c == ',' {
|
||||
tokens.push(Token::Comma);
|
||||
} else if c == '.' {
|
||||
tokens.push(Token::Period);
|
||||
} else {
|
||||
let mut buffer = String::with_capacity(20);
|
||||
buffer.push(c);
|
||||
|
||||
while let Some(x) = iterator.peek().cloned() {
|
||||
if ends_identifier(x) {
|
||||
break;
|
||||
}
|
||||
buffer.push(iterator.next().unwrap());
|
||||
}
|
||||
|
||||
match buffer.parse::<f64>() {
|
||||
Ok(f) => tokens.push(Token::NumLiteral(f)),
|
||||
_ => tokens.push(handle_identifier(buffer))
|
||||
}
|
||||
}
|
||||
}
|
||||
tokens.push(Token::EOF);
|
||||
tokens
|
||||
}
|
||||
|
||||
fn handle_identifier(identifier: String) -> Token {
|
||||
|
||||
let keyword = match &identifier[..] {
|
||||
"let" => Kw::Let,
|
||||
"if" => Kw::If,
|
||||
"then" => Kw::Then,
|
||||
"else" => Kw::Else,
|
||||
"while" => Kw::While,
|
||||
"end" => Kw::End,
|
||||
"fn" => Kw::Fn,
|
||||
"null" => Kw::Null,
|
||||
"=" => Kw::Assign,
|
||||
_ => return Token::Identifier(identifier)
|
||||
};
|
||||
|
||||
return Token::Keyword(keyword);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
Loading…
Reference in New Issue
Block a user