From 1059a88ee64c3ecabce08c43a4678580b9bdefc9 Mon Sep 17 00:00:00 2001 From: greg Date: Wed, 22 Jul 2015 03:12:01 -0700 Subject: [PATCH] Separate parsing into module --- src/main.rs | 148 +---------------------------------------------- src/parser.rs | 78 +++++++++++++++++++++++++ src/tokenizer.rs | 65 ++++++++++++++++++++- 3 files changed, 145 insertions(+), 146 deletions(-) create mode 100644 src/parser.rs diff --git a/src/main.rs b/src/main.rs index 3e05f1e..fd3b36a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,12 @@ use std::io; use std::io::Write; use std::io::BufRead; -use std::char; -use std::slice::Iter; -use tokenizer::Token; -use tokenizer::Token::*; +use tokenizer::tokenize; +use parser::{parse, ParseResult}; mod tokenizer; +mod parser; fn main() { @@ -15,21 +14,6 @@ fn main() { repl(); } - -#[derive(Debug)] -enum AST { - Name(String), - LangString(String), - Number(f64), - BinOp(Box, Box, Box), - Binding(String, Box) -} - -enum ParseResult { - Ok(AST), - Err(String) -} - fn repl() { let stdin = io::stdin(); let mut stdout = io::stdout(); @@ -63,129 +47,3 @@ fn repl() { } } } - - -fn tokenize(input: &str) -> Vec { - let mut tokens = Vec::new(); - let mut iterator = input.chars().peekable(); - - fn ends_identifier(c: char) -> bool { - match c { - c if char::is_whitespace(c) => true, - ',' => true, - ';' => true, - '(' => true, - ')' => true, - _ => false - } - } - - while let Some(c) = iterator.next() { - if char::is_whitespace(c) { - continue; - } else if c == '"' { - - let mut buffer = String::with_capacity(20); - while let Some(x) = iterator.next() { - if x == '"' { - break; - } - buffer.push(x); - } - tokens.push(Token::StrLiteral(buffer)); - - } else if c == '#' { - while let Some(x) = iterator.next() { - if x == '\n' { - break; - } - } - } else if c == ';' || c == '\n' { - tokens.push(Token::Separator); - } else if c == '(' { - tokens.push(Token::LParen); - } else if c == ')' { - tokens.push(Token::RParen); - } else if c == ',' { - tokens.push(Token::Comma); - } else { - let mut buffer = String::with_capacity(20); - buffer.push(c); - - while let Some(x) = iterator.peek().cloned() { - if ends_identifier(x) { - break; - } - buffer.push(iterator.next().unwrap()); - } - - match buffer.parse::() { - Ok(f) => tokens.push(Token::NumLiteral(f)), - _ => tokens.push(Token::Identifier(buffer)) - } - } - } - tokens.push(Token::EOF); - tokens -} - -fn parse(input: Vec) -> ParseResult { - - let mut tokens = input.iter(); - - if let ParseResult::Ok(ast) = let_expression(&mut tokens) { - if expect(EOF, &mut tokens) { - return ParseResult::Ok(ast); - } - } - - return ParseResult::Err("Bad parse".to_string()); -} - -fn expect(tok: Token, tokens: &mut Iter) -> bool { - if let Some(n) = tokens.next() { - let next = (*n).clone(); - return match (tok, next) { - (EOF, EOF) => true, - (Separator, Separator) => true, - (LParen, LParen) => true, - (RParen, RParen) => true, - (Comma, Comma) => true, - (NumLiteral(_), NumLiteral(_)) => true, - (StrLiteral(_), StrLiteral(_)) => true, - (Identifier(ref i1), Identifier(ref i2)) => i1 == i2, - _ => false - } - } - - return false; -} - -fn let_expression<'a>(input: &mut Iter) -> ParseResult { - if expect(Identifier("let".to_string()), input) { - if let Some(&Identifier(ref name)) = input.next() { - if let Some(&Identifier(ref s)) = input.next() { - if s == "=" { - let next = input.next(); - if let Some(&Identifier(ref value)) = next { - let ast = AST::Binding(name.clone(), Box::new(AST::Name(value.clone()))); - return ParseResult::Ok(ast); - } - - if let Some(&StrLiteral(ref value)) = next { - let ast = AST::Binding(name.clone(), Box::new(AST::LangString(value.clone()))); - return ParseResult::Ok(ast); - } - - if let Some(&NumLiteral(n)) = next { - let ast = AST::Binding(name.clone(), Box::new(AST::Number(n))); - return ParseResult::Ok(ast); - } - } - } - } - } - - return ParseResult::Err("Bad parse".to_string()); -} - diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..0d8c9bd --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,78 @@ +use std::slice::Iter; + +use tokenizer::{Token}; +use tokenizer::Token::*; + +#[derive(Debug)] +pub enum AST { + Name(String), + LangString(String), + Number(f64), + BinOp(Box, Box, Box), + Binding(String, Box) +} + +pub enum ParseResult { + Ok(AST), + Err(String) +} + +pub fn parse(input: Vec) -> ParseResult { + + let mut tokens = input.iter(); + + if let ParseResult::Ok(ast) = let_expression(&mut tokens) { + if expect(EOF, &mut tokens) { + return ParseResult::Ok(ast); + } + } + + return ParseResult::Err("Bad parse".to_string()); +} + +fn expect(tok: Token, tokens: &mut Iter) -> bool { + if let Some(n) = tokens.next() { + let next = (*n).clone(); + return match (tok, next) { + (EOF, EOF) => true, + (Separator, Separator) => true, + (LParen, LParen) => true, + (RParen, RParen) => true, + (Comma, Comma) => true, + (NumLiteral(_), NumLiteral(_)) => true, + (StrLiteral(_), StrLiteral(_)) => true, + (Identifier(ref i1), Identifier(ref i2)) => i1 == i2, + _ => false + } + } + + return false; +} + +fn let_expression<'a>(input: &mut Iter) -> ParseResult { + if expect(Identifier("let".to_string()), input) { + if let Some(&Identifier(ref name)) = input.next() { + if let Some(&Identifier(ref s)) = input.next() { + if s == "=" { + let next = input.next(); + if let Some(&Identifier(ref value)) = next { + let ast = AST::Binding(name.clone(), Box::new(AST::Name(value.clone()))); + return ParseResult::Ok(ast); + } + + if let Some(&StrLiteral(ref value)) = next { + let ast = AST::Binding(name.clone(), Box::new(AST::LangString(value.clone()))); + return ParseResult::Ok(ast); + } + + if let Some(&NumLiteral(n)) = next { + let ast = AST::Binding(name.clone(), Box::new(AST::Number(n))); + return ParseResult::Ok(ast); + } + } + } + } + } + + return ParseResult::Err("Bad parse".to_string()); +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 994f5be..f141311 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,4 +1,3 @@ - #[derive(Debug, Clone)] pub enum Token { EOF, @@ -11,3 +10,67 @@ pub enum Token { Identifier(String) /* Keyword(Keyword) */ //implement in future } + +pub fn tokenize(input: &str) -> Vec { + let mut tokens = Vec::new(); + let mut iterator = input.chars().peekable(); + + fn ends_identifier(c: char) -> bool { + match c { + c if char::is_whitespace(c) => true, + ',' => true, + ';' => true, + '(' => true, + ')' => true, + _ => false + } + } + + while let Some(c) = iterator.next() { + if char::is_whitespace(c) { + continue; + } else if c == '"' { + + let mut buffer = String::with_capacity(20); + while let Some(x) = iterator.next() { + if x == '"' { + break; + } + buffer.push(x); + } + tokens.push(Token::StrLiteral(buffer)); + + } else if c == '#' { + while let Some(x) = iterator.next() { + if x == '\n' { + break; + } + } + } else if c == ';' || c == '\n' { + tokens.push(Token::Separator); + } else if c == '(' { + tokens.push(Token::LParen); + } else if c == ')' { + tokens.push(Token::RParen); + } else if c == ',' { + tokens.push(Token::Comma); + } else { + let mut buffer = String::with_capacity(20); + buffer.push(c); + + while let Some(x) = iterator.peek().cloned() { + if ends_identifier(x) { + break; + } + buffer.push(iterator.next().unwrap()); + } + + match buffer.parse::() { + Ok(f) => tokens.push(Token::NumLiteral(f)), + _ => tokens.push(Token::Identifier(buffer)) + } + } + } + tokens.push(Token::EOF); + tokens +}