2015-12-25 02:03:11 -08:00
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
2015-07-22 03:02:55 -07:00
|
|
|
pub enum Token {
|
|
|
|
EOF,
|
2015-12-31 22:20:59 -08:00
|
|
|
Newline,
|
|
|
|
Semicolon,
|
2015-07-22 03:02:55 -07:00
|
|
|
LParen,
|
|
|
|
RParen,
|
|
|
|
Comma,
|
2015-07-26 01:51:15 -07:00
|
|
|
Period,
|
2016-01-05 22:00:29 -08:00
|
|
|
Colon,
|
2015-07-22 03:02:55 -07:00
|
|
|
NumLiteral(f64),
|
|
|
|
StrLiteral(String),
|
2015-07-22 04:01:56 -07:00
|
|
|
Identifier(String),
|
|
|
|
Keyword(Kw)
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
|
|
pub enum Kw {
|
|
|
|
If,
|
|
|
|
Then,
|
|
|
|
Else,
|
|
|
|
While,
|
|
|
|
End,
|
|
|
|
Let,
|
|
|
|
Fn,
|
2015-08-08 00:27:40 -07:00
|
|
|
Null,
|
|
|
|
Assign
|
2015-07-22 03:02:55 -07:00
|
|
|
}
|
2015-07-22 03:12:01 -07:00
|
|
|
|
2016-01-05 22:00:29 -08:00
|
|
|
fn is_digit(c: &char) -> bool {
|
|
|
|
c.is_digit(10)
|
|
|
|
}
|
|
|
|
|
2016-01-07 02:25:32 -08:00
|
|
|
fn ends_identifier(c: &char) -> bool {
|
|
|
|
let c = *c;
|
|
|
|
char::is_whitespace(c) ||
|
|
|
|
is_digit(&c) ||
|
|
|
|
c == ';' ||
|
|
|
|
c == '(' ||
|
|
|
|
c == ')' ||
|
|
|
|
c == ',' ||
|
|
|
|
c == '.' ||
|
|
|
|
c == ':'
|
|
|
|
}
|
|
|
|
|
2016-01-06 23:48:53 -08:00
|
|
|
pub fn tokenize(input: &str) -> Option<Vec<Token>> {
|
2016-01-05 22:00:29 -08:00
|
|
|
use self::Token::*;
|
2015-07-22 03:12:01 -07:00
|
|
|
let mut tokens = Vec::new();
|
2016-01-05 22:00:29 -08:00
|
|
|
let mut iter = input.chars().peekable();
|
|
|
|
|
|
|
|
while let Some(c) = iter.next() {
|
|
|
|
if char::is_whitespace(c) && c != '\n' {
|
|
|
|
continue;
|
|
|
|
} else if c == '#' {
|
|
|
|
while let Some(c) = iter.next() {
|
|
|
|
if c == '\n' { break; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let cur_tok =
|
|
|
|
if c == '\n' {
|
|
|
|
Newline
|
|
|
|
} else if c == ';' {
|
|
|
|
Semicolon
|
|
|
|
} else if c == '(' {
|
|
|
|
LParen
|
|
|
|
} else if c == ')' {
|
|
|
|
RParen
|
|
|
|
} else if c == ':' {
|
|
|
|
Colon
|
|
|
|
} else if c == '"' {
|
|
|
|
let mut buffer = String::with_capacity(20);
|
|
|
|
loop {
|
|
|
|
//TODO handle string escapes, interpolation
|
|
|
|
match iter.next() {
|
|
|
|
Some(x) if x == '"' => break,
|
|
|
|
Some(x) => buffer.push(x),
|
2016-01-06 23:48:53 -08:00
|
|
|
None => return None,
|
2016-01-05 22:00:29 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
StrLiteral(buffer)
|
2016-01-07 02:25:32 -08:00
|
|
|
} else if c == '.' && !iter.peek().map_or(false, |x| is_digit(x)) {
|
|
|
|
Period
|
|
|
|
} else if is_digit(&c) || c == '.' {
|
2016-01-07 01:09:18 -08:00
|
|
|
let mut buffer = String::with_capacity(20);
|
|
|
|
buffer.push(c);
|
|
|
|
loop {
|
|
|
|
if iter.peek().map_or(false, |x| is_digit(x) || *x == '.') {
|
|
|
|
let n = iter.next().unwrap();
|
|
|
|
buffer.push(n);
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
match buffer.parse::<f64>() {
|
|
|
|
Ok(f) => NumLiteral(f),
|
|
|
|
Err(_) => return None
|
|
|
|
}
|
2016-01-05 22:00:29 -08:00
|
|
|
} else {
|
2016-01-07 02:25:32 -08:00
|
|
|
let mut buffer = String::with_capacity(20);
|
|
|
|
buffer.push(c);
|
|
|
|
loop {
|
|
|
|
if iter.peek().map_or(false, |x| ends_identifier(x)) {
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
buffer.push(iter.next().unwrap());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
match &buffer[..] {
|
|
|
|
"if" => Keyword(Kw::If),
|
|
|
|
"then" => Keyword(Kw::Then),
|
|
|
|
b => Identifier(b.to_string())
|
|
|
|
}
|
2016-01-05 22:00:29 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
tokens.push(cur_tok);
|
|
|
|
}
|
|
|
|
|
2016-01-08 23:59:15 -08:00
|
|
|
tokens.push(EOF);
|
|
|
|
|
2016-01-06 23:48:53 -08:00
|
|
|
Some(tokens)
|
2015-07-22 03:12:01 -07:00
|
|
|
}
|
2015-07-22 04:01:56 -07:00
|
|
|
|
2015-12-20 17:03:03 -08:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn tokeniziation_tests() {
|
2016-01-08 23:59:15 -08:00
|
|
|
let input1 = "let a = 3\n";
|
|
|
|
let token1 = tokenize(input1).unwrap();
|
|
|
|
assert_eq!(format!("{:?}", token1),
|
|
|
|
"[Identifier(\"let\"), Identifier(\"a\"), Identifier(\"=\"), NumLiteral(3), Newline, EOF]");
|
2015-12-20 17:03:03 -08:00
|
|
|
}
|
|
|
|
}
|