#[derive(Debug, Clone, PartialEq)] pub enum Token { EOF, Separator, LParen, RParen, Comma, Period, NumLiteral(f64), StrLiteral(String), Identifier(String), Keyword(Kw) } #[derive(Debug, Clone, PartialEq)] pub enum Kw { If, Then, Else, While, End, Let, Fn, Null, Assign } pub fn tokenize(input: &str) -> Vec { let mut tokens = Vec::new(); let mut iterator = input.chars().peekable(); fn ends_identifier(c: char) -> bool { match c { c if char::is_whitespace(c) => true, ',' => true, ';' => true, '(' => true, ')' => true, _ => false } } while let Some(c) = iterator.next() { if char::is_whitespace(c) && c != '\n' { continue; } else if c == '"' { let mut buffer = String::with_capacity(20); loop { match iterator.next() { Some(x) if x == '"' => break, Some(x) => buffer.push(x), None => return tokens, } } tokens.push(Token::StrLiteral(buffer)); } else if c == '#' { while let Some(x) = iterator.next() { if x == '\n' { break; } } } else if c == ';' || c == '\n' { if let Some(&Token::Separator) = tokens.last() { //skip past multiple separators } else { tokens.push(Token::Separator); } } else if c == '(' { tokens.push(Token::LParen); } else if c == ')' { tokens.push(Token::RParen); } else if c == ',' { tokens.push(Token::Comma); } else if c == '.' { tokens.push(Token::Period); } else { let mut buffer = String::with_capacity(20); buffer.push(c); while let Some(x) = iterator.peek().cloned() { if ends_identifier(x) { break; } buffer.push(iterator.next().unwrap()); } match buffer.parse::() { Ok(f) => tokens.push(Token::NumLiteral(f)), _ => tokens.push(handle_identifier(buffer)) } } } tokens.push(Token::EOF); tokens } fn handle_identifier(identifier: String) -> Token { let keyword = match &identifier[..] { "let" => Kw::Let, "if" => Kw::If, "then" => Kw::Then, "else" => Kw::Else, "while" => Kw::While, "end" => Kw::End, "fn" => Kw::Fn, "null" => Kw::Null, "=" => Kw::Assign, _ => return Token::Identifier(identifier) }; return Token::Keyword(keyword); } #[cfg(test)] mod tests { use super::*; #[test] fn tokeniziation_tests() { let t1 = "let a = 3\n"; assert_eq!(format!("{:?}", tokenize(t1)), "[Keyword(Let), Identifier(\"a\"), Keyword(Assign), NumLiteral(3), Separator, EOF]"); // this is intentional let t2 = "a + b*c\n"; assert_eq!(format!("{:?}", tokenize(t2)), "[Identifier(\"a\"), Identifier(\"+\"), Identifier(\"b*c\"), Separator, EOF]"); } }