Run rustfmt on tokenizer code

This commit is contained in:
Greg Shuflin 2021-10-29 19:03:42 -07:00
parent 304df5c50e
commit 8111f69640

View File

@ -1,8 +1,13 @@
#![allow(clippy::upper_case_acronyms)]
use std::{
convert::{TryFrom, TryInto},
fmt,
iter::{Iterator, Peekable},
rc::Rc,
};
use itertools::Itertools;
use std::{iter::{Iterator, Peekable}, convert::TryFrom, rc::Rc, fmt};
use std::convert::TryInto;
/// A location in a particular source file. Note that the
/// sizes of the internal unsigned integer types limit
@ -22,25 +27,33 @@ impl fmt::Display for Location {
#[derive(Debug, PartialEq, Clone)]
pub enum TokenKind {
Newline, Semicolon,
Newline,
Semicolon,
LParen, RParen,
LSquareBracket, RSquareBracket,
LAngleBracket, RAngleBracket,
LCurlyBrace, RCurlyBrace,
Pipe, Backslash,
LParen,
RParen,
LSquareBracket,
RSquareBracket,
LAngleBracket,
RAngleBracket,
LCurlyBrace,
RCurlyBrace,
Pipe,
Backslash,
AtSign,
Comma, Period, Colon, Underscore,
Slash, Equals,
Comma,
Period,
Colon,
Underscore,
Slash,
Equals,
Operator(Rc<String>),
DigitGroup(Rc<String>), HexLiteral(Rc<String>), BinNumberSigil,
StrLiteral {
s: Rc<String>,
prefix: Option<Rc<String>>
},
DigitGroup(Rc<String>),
HexLiteral(Rc<String>),
BinNumberSigil,
StrLiteral { s: Rc<String>, prefix: Option<Rc<String>> },
Identifier(Rc<String>),
Keyword(Kw),
@ -56,7 +69,7 @@ impl fmt::Display for TokenKind {
&Operator(ref s) => write!(f, "Operator({})", **s),
&DigitGroup(ref s) => write!(f, "DigitGroup({})", s),
&HexLiteral(ref s) => write!(f, "HexLiteral({})", s),
&StrLiteral {ref s, .. } => write!(f, "StrLiteral({})", s),
&StrLiteral { ref s, .. } => write!(f, "StrLiteral({})", s),
&Identifier(ref s) => write!(f, "Identifier({})", s),
&Error(ref s) => write!(f, "Error({})", s),
other => write!(f, "{:?}", other),
@ -66,17 +79,28 @@ impl fmt::Display for TokenKind {
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Kw {
If, Then, Else,
If,
Then,
Else,
Is,
Func,
For, While,
Const, Let, In,
For,
While,
Const,
Let,
In,
Mut,
Return,
Alias, Type, SelfType, SelfIdent,
Interface, Impl,
True, False,
Module, Import
Alias,
Type,
SelfType,
SelfIdent,
Interface,
Impl,
True,
False,
Module,
Import,
}
impl TryFrom<&str> for Kw {
@ -127,7 +151,8 @@ impl Token {
}
}
const OPERATOR_CHARS: [char; 17] = ['!', '$', '%', '&', '*', '+', '-', '.', ':', '<', '>', '=', '?', '^', '|', '~', '`'];
const OPERATOR_CHARS: [char; 17] =
['!', '$', '%', '&', '*', '+', '-', '.', ':', '<', '>', '=', '?', '^', '|', '~', '`'];
fn is_operator(c: &char) -> bool {
OPERATOR_CHARS.iter().any(|x| x == c)
}
@ -138,9 +163,7 @@ pub fn tokenize(input: &str) -> Vec<Token> {
let mut tokens: Vec<Token> = Vec::new();
let mut input = Iterator::intersperse(input.lines().enumerate(), (0, "\n"))
.flat_map(|(line_idx, line)| {
line.chars().enumerate().map(move |(ch_idx, ch)| (line_idx, ch_idx, ch))
})
.flat_map(|(line_idx, line)| line.chars().enumerate().map(move |(ch_idx, ch)| (line_idx, ch_idx, ch)))
.peekable();
while let Some((line_num, char_num, c)) = input.next() {
@ -153,7 +176,7 @@ pub fn tokenize(input: &str) -> Vec<Token> {
}
}
continue;
},
}
Some('*') => {
input.next();
let mut comment_level = 1;
@ -174,15 +197,20 @@ pub fn tokenize(input: &str) -> Vec<Token> {
} else {
continue;
}
},
_ => Slash
}
_ => Slash,
},
c if c.is_whitespace() && c != '\n' => continue,
'\n' => Newline, ';' => Semicolon,
':' => Colon, ',' => Comma,
'(' => LParen, ')' => RParen,
'{' => LCurlyBrace, '}' => RCurlyBrace,
'[' => LSquareBracket, ']' => RSquareBracket,
'\n' => Newline,
';' => Semicolon,
':' => Colon,
',' => Comma,
'(' => LParen,
')' => RParen,
'{' => LCurlyBrace,
'}' => RCurlyBrace,
'[' => LSquareBracket,
']' => RSquareBracket,
'"' => handle_quote(&mut input, None),
'\\' => Backslash,
'@' => AtSign,
@ -191,36 +219,43 @@ pub fn tokenize(input: &str) -> Vec<Token> {
c if is_operator(&c) => handle_operator(c, &mut input),
unknown => Error(format!("Unexpected character: {}", unknown)),
};
let location = Location { line_num: line_num.try_into().unwrap(), char_num: char_num.try_into().unwrap() };
let location =
Location { line_num: line_num.try_into().unwrap(), char_num: char_num.try_into().unwrap() };
tokens.push(Token { kind: cur_tok_kind, location });
}
tokens
}
fn handle_digit(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>) -> TokenKind {
fn handle_digit(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
let next_ch = input.peek().map(|&(_, _, c)| c);
if c == '0' && next_ch == Some('x') {
input.next();
let rest: String = input.peeking_take_while(|&(_, _, ref c)| c.is_digit(16) || *c == '_').map(|(_, _, c)| { c }).collect();
let rest: String = input
.peeking_take_while(|&(_, _, ref c)| c.is_digit(16) || *c == '_')
.map(|(_, _, c)| c)
.collect();
HexLiteral(Rc::new(rest))
} else if c == '0' && next_ch == Some('b') {
input.next();
BinNumberSigil
} else {
let mut buf = c.to_string();
buf.extend(input.peeking_take_while(|&(_, _, ref c)| c.is_digit(10)).map(|(_, _, c)| { c }));
buf.extend(input.peeking_take_while(|&(_, _, ref c)| c.is_digit(10)).map(|(_, _, c)| c));
DigitGroup(Rc::new(buf))
}
}
fn handle_quote(input: &mut Peekable<impl Iterator<Item=CharData>>, quote_prefix: Option<&str>) -> TokenKind {
fn handle_quote(
input: &mut Peekable<impl Iterator<Item = CharData>>,
quote_prefix: Option<&str>,
) -> TokenKind {
let mut buf = String::new();
loop {
match input.next().map(|(_, _, c)| { c }) {
match input.next().map(|(_, _, c)| c) {
Some('"') => break,
Some('\\') => {
let next = input.peek().map(|&(_, _, c)| { c });
let next = input.peek().map(|&(_, _, c)| c);
if next == Some('n') {
input.next();
buf.push('\n')
@ -231,7 +266,7 @@ fn handle_quote(input: &mut Peekable<impl Iterator<Item=CharData>>, quote_prefix
input.next();
buf.push('\t');
}
},
}
Some(c) => buf.push(c),
None => return TokenKind::Error("Unclosed string".to_string()),
}
@ -239,24 +274,24 @@ fn handle_quote(input: &mut Peekable<impl Iterator<Item=CharData>>, quote_prefix
TokenKind::StrLiteral { s: Rc::new(buf), prefix: quote_prefix.map(|s| Rc::new(s.to_string())) }
}
fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>) -> TokenKind {
fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
let mut buf = String::new();
buf.push(c);
let next_is_alphabetic = input.peek().map(|&(_, _, c)| !c.is_alphabetic()).unwrap_or(true);
if c == '_' && next_is_alphabetic {
return TokenKind::Underscore
return TokenKind::Underscore;
}
loop {
match input.peek().map(|&(_, _, c)| { c }) {
match input.peek().map(|&(_, _, c)| c) {
Some(c) if c == '"' => {
input.next();
return handle_quote(input, Some(&buf));
},
}
Some(c) if c.is_alphanumeric() || c == '_' => {
input.next();
buf.push(c);
},
}
_ => break,
}
}
@ -267,11 +302,11 @@ fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>
}
}
fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>) -> TokenKind {
fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
match c {
'<' | '>' | '|' | '.' | '=' => {
let next = &input.peek().map(|&(_, _, c)| { c });
let next_is_op = next.map(|n| { is_operator(&n) }).unwrap_or(false);
let next = &input.peek().map(|&(_, _, c)| c);
let next_is_op = next.map(|n| is_operator(&n)).unwrap_or(false);
if !next_is_op {
return match c {
'<' => LAngleBracket,
@ -280,9 +315,9 @@ fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>)
'.' => Period,
'=' => Equals,
_ => unreachable!(),
};
}
}
},
_ => (),
};
@ -290,27 +325,27 @@ fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>)
if c == '`' {
loop {
match input.peek().map(|&(_, _, c)| { c }) {
match input.peek().map(|&(_, _, c)| c) {
Some(c) if c.is_alphabetic() || c == '_' => {
input.next();
buf.push(c);
},
}
Some('`') => {
input.next();
break;
},
_ => break
}
_ => break,
}
}
} else {
buf.push(c);
loop {
match input.peek().map(|&(_, _, c)| { c }) {
match input.peek().map(|&(_, _, c)| c) {
Some(c) if is_operator(&c) => {
input.next();
buf.push(c);
},
_ => break
}
_ => break,
}
}
}
@ -319,12 +354,23 @@ fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>)
#[cfg(test)]
mod schala_tokenizer_tests {
use super::*;
use super::Kw::*;
use super::{Kw::*, *};
macro_rules! digit { ($ident:expr) => { DigitGroup(Rc::new($ident.to_string())) } }
macro_rules! ident { ($ident:expr) => { Identifier(Rc::new($ident.to_string())) } }
macro_rules! op { ($ident:expr) => { Operator(Rc::new($ident.to_string())) } }
macro_rules! digit {
($ident:expr) => {
DigitGroup(Rc::new($ident.to_string()))
};
}
macro_rules! ident {
($ident:expr) => {
Identifier(Rc::new($ident.to_string()))
};
}
macro_rules! op {
($ident:expr) => {
Operator(Rc::new($ident.to_string()))
};
}
fn token_kinds(input: &str) -> Vec<TokenKind> {
tokenize(input).into_iter().map(move |tok| tok.kind).collect()
@ -333,8 +379,22 @@ mod schala_tokenizer_tests {
#[test]
fn tokens() {
let output = token_kinds("let a: A<B> = c ++ d");
assert_eq!(output, vec![Keyword(Let), ident!("a"), Colon, ident!("A"),
LAngleBracket, ident!("B"), RAngleBracket, Equals, ident!("c"), op!("++"), ident!("d")]);
assert_eq!(
output,
vec![
Keyword(Let),
ident!("a"),
Colon,
ident!("A"),
LAngleBracket,
ident!("B"),
RAngleBracket,
Equals,
ident!("c"),
op!("++"),
ident!("d")
]
);
}
#[test]
@ -356,7 +416,17 @@ mod schala_tokenizer_tests {
//TODO not sure if I want this behavior
let output = token_kinds("1 + /* hella */ bro */ 2");
assert_eq!(output, vec![digit!("1"), op!("+"), Identifier(Rc::new("bro".to_string())), Operator(Rc::new("*".to_string())), Slash, DigitGroup(Rc::new("2".to_string()))]);
assert_eq!(
output,
vec![
digit!("1"),
op!("+"),
Identifier(Rc::new("bro".to_string())),
Operator(Rc::new("*".to_string())),
Slash,
DigitGroup(Rc::new("2".to_string()))
]
);
}
#[test]
@ -371,9 +441,18 @@ mod schala_tokenizer_tests {
assert_eq!(output, vec![StrLiteral { s: Rc::new("some string".to_string()), prefix: None }]);
let output = token_kinds(r#"b"some bytestring""#);
assert_eq!(output, vec![StrLiteral { s: Rc::new("some bytestring".to_string()), prefix: Some(Rc::new("b".to_string())) }]);
assert_eq!(
output,
vec![StrLiteral {
s: Rc::new("some bytestring".to_string()),
prefix: Some(Rc::new("b".to_string()))
}]
);
let output = token_kinds(r#""Do \n \" escapes work\t""#);
assert_eq!(output, vec![StrLiteral { s: Rc::new("Do \n \" escapes work\t".to_string()), prefix: None }]);
assert_eq!(
output,
vec![StrLiteral { s: Rc::new("Do \n \" escapes work\t".to_string()), prefix: None }]
);
}
}