From 13cde3106cee792229ac00d6145f3ff3c31bfe04 Mon Sep 17 00:00:00 2001 From: greg Date: Tue, 5 Jan 2016 22:00:29 -0800 Subject: [PATCH] Start making tokenizer changes Hopefully this time iron out all the bugs from the last implementation --- src/tokenizer.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 01511a6..77e8df3 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -7,6 +7,7 @@ pub enum Token { RParen, Comma, Period, + Colon, NumLiteral(f64), StrLiteral(String), Identifier(String), @@ -26,8 +27,53 @@ pub enum Kw { Assign } +fn is_digit(c: &char) -> bool { + c.is_digit(10) +} + pub fn tokenize(input: &str) -> Vec { + use self::Token::*; let mut tokens = Vec::new(); + let mut iter = input.chars().peekable(); + + while let Some(c) = iter.next() { + if char::is_whitespace(c) && c != '\n' { + continue; + } else if c == '#' { + while let Some(c) = iter.next() { + if c == '\n' { break; } + } + } + + let cur_tok = + if c == '\n' { + Newline + } else if c == ';' { + Semicolon + } else if c == '(' { + LParen + } else if c == ')' { + RParen + } else if c == ':' { + Colon + } else if c == '"' { + let mut buffer = String::with_capacity(20); + loop { + //TODO handle string escapes, interpolation + match iter.next() { + Some(x) if x == '"' => break, + Some(x) => buffer.push(x), + None => return tokens, + } + } + StrLiteral(buffer) + } else { + StrLiteral("DUMMY".to_string()) + }; + + tokens.push(cur_tok); + } + tokens }