Run rustfmt on tokenizer code
This commit is contained in:
parent
304df5c50e
commit
8111f69640
@ -1,8 +1,13 @@
|
||||
#![allow(clippy::upper_case_acronyms)]
|
||||
|
||||
use std::{
|
||||
convert::{TryFrom, TryInto},
|
||||
fmt,
|
||||
iter::{Iterator, Peekable},
|
||||
rc::Rc,
|
||||
};
|
||||
|
||||
use itertools::Itertools;
|
||||
use std::{iter::{Iterator, Peekable}, convert::TryFrom, rc::Rc, fmt};
|
||||
use std::convert::TryInto;
|
||||
|
||||
/// A location in a particular source file. Note that the
|
||||
/// sizes of the internal unsigned integer types limit
|
||||
@ -22,25 +27,33 @@ impl fmt::Display for Location {
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum TokenKind {
|
||||
Newline, Semicolon,
|
||||
Newline,
|
||||
Semicolon,
|
||||
|
||||
LParen, RParen,
|
||||
LSquareBracket, RSquareBracket,
|
||||
LAngleBracket, RAngleBracket,
|
||||
LCurlyBrace, RCurlyBrace,
|
||||
Pipe, Backslash,
|
||||
LParen,
|
||||
RParen,
|
||||
LSquareBracket,
|
||||
RSquareBracket,
|
||||
LAngleBracket,
|
||||
RAngleBracket,
|
||||
LCurlyBrace,
|
||||
RCurlyBrace,
|
||||
Pipe,
|
||||
Backslash,
|
||||
AtSign,
|
||||
|
||||
|
||||
Comma, Period, Colon, Underscore,
|
||||
Slash, Equals,
|
||||
Comma,
|
||||
Period,
|
||||
Colon,
|
||||
Underscore,
|
||||
Slash,
|
||||
Equals,
|
||||
|
||||
Operator(Rc<String>),
|
||||
DigitGroup(Rc<String>), HexLiteral(Rc<String>), BinNumberSigil,
|
||||
StrLiteral {
|
||||
s: Rc<String>,
|
||||
prefix: Option<Rc<String>>
|
||||
},
|
||||
DigitGroup(Rc<String>),
|
||||
HexLiteral(Rc<String>),
|
||||
BinNumberSigil,
|
||||
StrLiteral { s: Rc<String>, prefix: Option<Rc<String>> },
|
||||
Identifier(Rc<String>),
|
||||
Keyword(Kw),
|
||||
|
||||
@ -66,17 +79,28 @@ impl fmt::Display for TokenKind {
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum Kw {
|
||||
If, Then, Else,
|
||||
If,
|
||||
Then,
|
||||
Else,
|
||||
Is,
|
||||
Func,
|
||||
For, While,
|
||||
Const, Let, In,
|
||||
For,
|
||||
While,
|
||||
Const,
|
||||
Let,
|
||||
In,
|
||||
Mut,
|
||||
Return,
|
||||
Alias, Type, SelfType, SelfIdent,
|
||||
Interface, Impl,
|
||||
True, False,
|
||||
Module, Import
|
||||
Alias,
|
||||
Type,
|
||||
SelfType,
|
||||
SelfIdent,
|
||||
Interface,
|
||||
Impl,
|
||||
True,
|
||||
False,
|
||||
Module,
|
||||
Import,
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for Kw {
|
||||
@ -127,7 +151,8 @@ impl Token {
|
||||
}
|
||||
}
|
||||
|
||||
const OPERATOR_CHARS: [char; 17] = ['!', '$', '%', '&', '*', '+', '-', '.', ':', '<', '>', '=', '?', '^', '|', '~', '`'];
|
||||
const OPERATOR_CHARS: [char; 17] =
|
||||
['!', '$', '%', '&', '*', '+', '-', '.', ':', '<', '>', '=', '?', '^', '|', '~', '`'];
|
||||
fn is_operator(c: &char) -> bool {
|
||||
OPERATOR_CHARS.iter().any(|x| x == c)
|
||||
}
|
||||
@ -138,9 +163,7 @@ pub fn tokenize(input: &str) -> Vec<Token> {
|
||||
let mut tokens: Vec<Token> = Vec::new();
|
||||
|
||||
let mut input = Iterator::intersperse(input.lines().enumerate(), (0, "\n"))
|
||||
.flat_map(|(line_idx, line)| {
|
||||
line.chars().enumerate().map(move |(ch_idx, ch)| (line_idx, ch_idx, ch))
|
||||
})
|
||||
.flat_map(|(line_idx, line)| line.chars().enumerate().map(move |(ch_idx, ch)| (line_idx, ch_idx, ch)))
|
||||
.peekable();
|
||||
|
||||
while let Some((line_num, char_num, c)) = input.next() {
|
||||
@ -153,7 +176,7 @@ pub fn tokenize(input: &str) -> Vec<Token> {
|
||||
}
|
||||
}
|
||||
continue;
|
||||
},
|
||||
}
|
||||
Some('*') => {
|
||||
input.next();
|
||||
let mut comment_level = 1;
|
||||
@ -174,15 +197,20 @@ pub fn tokenize(input: &str) -> Vec<Token> {
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
},
|
||||
_ => Slash
|
||||
}
|
||||
_ => Slash,
|
||||
},
|
||||
c if c.is_whitespace() && c != '\n' => continue,
|
||||
'\n' => Newline, ';' => Semicolon,
|
||||
':' => Colon, ',' => Comma,
|
||||
'(' => LParen, ')' => RParen,
|
||||
'{' => LCurlyBrace, '}' => RCurlyBrace,
|
||||
'[' => LSquareBracket, ']' => RSquareBracket,
|
||||
'\n' => Newline,
|
||||
';' => Semicolon,
|
||||
':' => Colon,
|
||||
',' => Comma,
|
||||
'(' => LParen,
|
||||
')' => RParen,
|
||||
'{' => LCurlyBrace,
|
||||
'}' => RCurlyBrace,
|
||||
'[' => LSquareBracket,
|
||||
']' => RSquareBracket,
|
||||
'"' => handle_quote(&mut input, None),
|
||||
'\\' => Backslash,
|
||||
'@' => AtSign,
|
||||
@ -191,7 +219,8 @@ pub fn tokenize(input: &str) -> Vec<Token> {
|
||||
c if is_operator(&c) => handle_operator(c, &mut input),
|
||||
unknown => Error(format!("Unexpected character: {}", unknown)),
|
||||
};
|
||||
let location = Location { line_num: line_num.try_into().unwrap(), char_num: char_num.try_into().unwrap() };
|
||||
let location =
|
||||
Location { line_num: line_num.try_into().unwrap(), char_num: char_num.try_into().unwrap() };
|
||||
tokens.push(Token { kind: cur_tok_kind, location });
|
||||
}
|
||||
tokens
|
||||
@ -202,25 +231,31 @@ fn handle_digit(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>) ->
|
||||
|
||||
if c == '0' && next_ch == Some('x') {
|
||||
input.next();
|
||||
let rest: String = input.peeking_take_while(|&(_, _, ref c)| c.is_digit(16) || *c == '_').map(|(_, _, c)| { c }).collect();
|
||||
let rest: String = input
|
||||
.peeking_take_while(|&(_, _, ref c)| c.is_digit(16) || *c == '_')
|
||||
.map(|(_, _, c)| c)
|
||||
.collect();
|
||||
HexLiteral(Rc::new(rest))
|
||||
} else if c == '0' && next_ch == Some('b') {
|
||||
input.next();
|
||||
BinNumberSigil
|
||||
} else {
|
||||
let mut buf = c.to_string();
|
||||
buf.extend(input.peeking_take_while(|&(_, _, ref c)| c.is_digit(10)).map(|(_, _, c)| { c }));
|
||||
buf.extend(input.peeking_take_while(|&(_, _, ref c)| c.is_digit(10)).map(|(_, _, c)| c));
|
||||
DigitGroup(Rc::new(buf))
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_quote(input: &mut Peekable<impl Iterator<Item=CharData>>, quote_prefix: Option<&str>) -> TokenKind {
|
||||
fn handle_quote(
|
||||
input: &mut Peekable<impl Iterator<Item = CharData>>,
|
||||
quote_prefix: Option<&str>,
|
||||
) -> TokenKind {
|
||||
let mut buf = String::new();
|
||||
loop {
|
||||
match input.next().map(|(_, _, c)| { c }) {
|
||||
match input.next().map(|(_, _, c)| c) {
|
||||
Some('"') => break,
|
||||
Some('\\') => {
|
||||
let next = input.peek().map(|&(_, _, c)| { c });
|
||||
let next = input.peek().map(|&(_, _, c)| c);
|
||||
if next == Some('n') {
|
||||
input.next();
|
||||
buf.push('\n')
|
||||
@ -231,7 +266,7 @@ fn handle_quote(input: &mut Peekable<impl Iterator<Item=CharData>>, quote_prefix
|
||||
input.next();
|
||||
buf.push('\t');
|
||||
}
|
||||
},
|
||||
}
|
||||
Some(c) => buf.push(c),
|
||||
None => return TokenKind::Error("Unclosed string".to_string()),
|
||||
}
|
||||
@ -244,19 +279,19 @@ fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>
|
||||
buf.push(c);
|
||||
let next_is_alphabetic = input.peek().map(|&(_, _, c)| !c.is_alphabetic()).unwrap_or(true);
|
||||
if c == '_' && next_is_alphabetic {
|
||||
return TokenKind::Underscore
|
||||
return TokenKind::Underscore;
|
||||
}
|
||||
|
||||
loop {
|
||||
match input.peek().map(|&(_, _, c)| { c }) {
|
||||
match input.peek().map(|&(_, _, c)| c) {
|
||||
Some(c) if c == '"' => {
|
||||
input.next();
|
||||
return handle_quote(input, Some(&buf));
|
||||
},
|
||||
}
|
||||
Some(c) if c.is_alphanumeric() || c == '_' => {
|
||||
input.next();
|
||||
buf.push(c);
|
||||
},
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
@ -270,8 +305,8 @@ fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>
|
||||
fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
|
||||
match c {
|
||||
'<' | '>' | '|' | '.' | '=' => {
|
||||
let next = &input.peek().map(|&(_, _, c)| { c });
|
||||
let next_is_op = next.map(|n| { is_operator(&n) }).unwrap_or(false);
|
||||
let next = &input.peek().map(|&(_, _, c)| c);
|
||||
let next_is_op = next.map(|n| is_operator(&n)).unwrap_or(false);
|
||||
if !next_is_op {
|
||||
return match c {
|
||||
'<' => LAngleBracket,
|
||||
@ -280,9 +315,9 @@ fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>)
|
||||
'.' => Period,
|
||||
'=' => Equals,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
}
|
||||
}
|
||||
},
|
||||
_ => (),
|
||||
};
|
||||
|
||||
@ -290,27 +325,27 @@ fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>)
|
||||
|
||||
if c == '`' {
|
||||
loop {
|
||||
match input.peek().map(|&(_, _, c)| { c }) {
|
||||
match input.peek().map(|&(_, _, c)| c) {
|
||||
Some(c) if c.is_alphabetic() || c == '_' => {
|
||||
input.next();
|
||||
buf.push(c);
|
||||
},
|
||||
}
|
||||
Some('`') => {
|
||||
input.next();
|
||||
break;
|
||||
},
|
||||
_ => break
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
buf.push(c);
|
||||
loop {
|
||||
match input.peek().map(|&(_, _, c)| { c }) {
|
||||
match input.peek().map(|&(_, _, c)| c) {
|
||||
Some(c) if is_operator(&c) => {
|
||||
input.next();
|
||||
buf.push(c);
|
||||
},
|
||||
_ => break
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -319,12 +354,23 @@ fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>)
|
||||
|
||||
#[cfg(test)]
|
||||
mod schala_tokenizer_tests {
|
||||
use super::*;
|
||||
use super::Kw::*;
|
||||
use super::{Kw::*, *};
|
||||
|
||||
macro_rules! digit { ($ident:expr) => { DigitGroup(Rc::new($ident.to_string())) } }
|
||||
macro_rules! ident { ($ident:expr) => { Identifier(Rc::new($ident.to_string())) } }
|
||||
macro_rules! op { ($ident:expr) => { Operator(Rc::new($ident.to_string())) } }
|
||||
macro_rules! digit {
|
||||
($ident:expr) => {
|
||||
DigitGroup(Rc::new($ident.to_string()))
|
||||
};
|
||||
}
|
||||
macro_rules! ident {
|
||||
($ident:expr) => {
|
||||
Identifier(Rc::new($ident.to_string()))
|
||||
};
|
||||
}
|
||||
macro_rules! op {
|
||||
($ident:expr) => {
|
||||
Operator(Rc::new($ident.to_string()))
|
||||
};
|
||||
}
|
||||
|
||||
fn token_kinds(input: &str) -> Vec<TokenKind> {
|
||||
tokenize(input).into_iter().map(move |tok| tok.kind).collect()
|
||||
@ -333,8 +379,22 @@ mod schala_tokenizer_tests {
|
||||
#[test]
|
||||
fn tokens() {
|
||||
let output = token_kinds("let a: A<B> = c ++ d");
|
||||
assert_eq!(output, vec![Keyword(Let), ident!("a"), Colon, ident!("A"),
|
||||
LAngleBracket, ident!("B"), RAngleBracket, Equals, ident!("c"), op!("++"), ident!("d")]);
|
||||
assert_eq!(
|
||||
output,
|
||||
vec![
|
||||
Keyword(Let),
|
||||
ident!("a"),
|
||||
Colon,
|
||||
ident!("A"),
|
||||
LAngleBracket,
|
||||
ident!("B"),
|
||||
RAngleBracket,
|
||||
Equals,
|
||||
ident!("c"),
|
||||
op!("++"),
|
||||
ident!("d")
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -356,7 +416,17 @@ mod schala_tokenizer_tests {
|
||||
|
||||
//TODO not sure if I want this behavior
|
||||
let output = token_kinds("1 + /* hella */ bro */ 2");
|
||||
assert_eq!(output, vec![digit!("1"), op!("+"), Identifier(Rc::new("bro".to_string())), Operator(Rc::new("*".to_string())), Slash, DigitGroup(Rc::new("2".to_string()))]);
|
||||
assert_eq!(
|
||||
output,
|
||||
vec![
|
||||
digit!("1"),
|
||||
op!("+"),
|
||||
Identifier(Rc::new("bro".to_string())),
|
||||
Operator(Rc::new("*".to_string())),
|
||||
Slash,
|
||||
DigitGroup(Rc::new("2".to_string()))
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -371,9 +441,18 @@ mod schala_tokenizer_tests {
|
||||
assert_eq!(output, vec![StrLiteral { s: Rc::new("some string".to_string()), prefix: None }]);
|
||||
|
||||
let output = token_kinds(r#"b"some bytestring""#);
|
||||
assert_eq!(output, vec![StrLiteral { s: Rc::new("some bytestring".to_string()), prefix: Some(Rc::new("b".to_string())) }]);
|
||||
assert_eq!(
|
||||
output,
|
||||
vec![StrLiteral {
|
||||
s: Rc::new("some bytestring".to_string()),
|
||||
prefix: Some(Rc::new("b".to_string()))
|
||||
}]
|
||||
);
|
||||
|
||||
let output = token_kinds(r#""Do \n \" escapes work\t""#);
|
||||
assert_eq!(output, vec![StrLiteral { s: Rc::new("Do \n \" escapes work\t".to_string()), prefix: None }]);
|
||||
assert_eq!(
|
||||
output,
|
||||
vec![StrLiteral { s: Rc::new("Do \n \" escapes work\t".to_string()), prefix: None }]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user