Add tokenization for string literal prefixes

This commit is contained in:
greg 2019-11-05 02:22:11 -08:00
parent efc8497235
commit d20acf7166
2 changed files with 23 additions and 7 deletions

View File

@ -991,7 +991,7 @@ impl Parser {
self.token_handler.next(); self.token_handler.next();
Pattern::Literal(PatternLiteral::BoolPattern(false)) Pattern::Literal(PatternLiteral::BoolPattern(false))
}, },
StrLiteral(s) => { StrLiteral { s, .. } => {
self.token_handler.next(); self.token_handler.next();
Pattern::Literal(PatternLiteral::StringPattern(s)) Pattern::Literal(PatternLiteral::StringPattern(s))
}, },
@ -1140,7 +1140,7 @@ impl Parser {
let id = self.id_store.fresh(); let id = self.id_store.fresh();
Ok(Expression::new(id, BoolLiteral(false))) Ok(Expression::new(id, BoolLiteral(false)))
}, },
StrLiteral(s) => { StrLiteral {s, ..} => {
self.token_handler.next(); self.token_handler.next();
let id = self.id_store.fresh(); let id = self.id_store.fresh();
Ok(Expression::new(id, StringLiteral(s.clone()))) Ok(Expression::new(id, StringLiteral(s.clone())))

View File

@ -21,7 +21,10 @@ pub enum TokenKind {
Operator(Rc<String>), Operator(Rc<String>),
DigitGroup(Rc<String>), HexLiteral(Rc<String>), BinNumberSigil, DigitGroup(Rc<String>), HexLiteral(Rc<String>), BinNumberSigil,
StrLiteral(Rc<String>), StrLiteral {
s: Rc<String>,
prefix: Option<Rc<String>>
},
Identifier(Rc<String>), Identifier(Rc<String>),
Keyword(Kw), Keyword(Kw),
@ -37,7 +40,7 @@ impl fmt::Display for TokenKind {
&Operator(ref s) => write!(f, "Operator({})", **s), &Operator(ref s) => write!(f, "Operator({})", **s),
&DigitGroup(ref s) => write!(f, "DigitGroup({})", s), &DigitGroup(ref s) => write!(f, "DigitGroup({})", s),
&HexLiteral(ref s) => write!(f, "HexLiteral({})", s), &HexLiteral(ref s) => write!(f, "HexLiteral({})", s),
&StrLiteral(ref s) => write!(f, "StrLiteral({})", s), &StrLiteral {ref s, .. } => write!(f, "StrLiteral({})", s),
&Identifier(ref s) => write!(f, "Identifier({})", s), &Identifier(ref s) => write!(f, "Identifier({})", s),
&Error(ref s) => write!(f, "Error({})", s), &Error(ref s) => write!(f, "Error({})", s),
other => write!(f, "{:?}", other), other => write!(f, "{:?}", other),
@ -163,7 +166,7 @@ pub fn tokenize(input: &str) -> Vec<Token> {
'(' => LParen, ')' => RParen, '(' => LParen, ')' => RParen,
'{' => LCurlyBrace, '}' => RCurlyBrace, '{' => LCurlyBrace, '}' => RCurlyBrace,
'[' => LSquareBracket, ']' => RSquareBracket, '[' => LSquareBracket, ']' => RSquareBracket,
'"' => handle_quote(&mut input), '"' => handle_quote(&mut input, None),
'\\' => Backslash, '\\' => Backslash,
c if c.is_digit(10) => handle_digit(c, &mut input), c if c.is_digit(10) => handle_digit(c, &mut input),
c if c.is_alphabetic() || c == '_' => handle_alphabetic(c, &mut input), c if c.is_alphabetic() || c == '_' => handle_alphabetic(c, &mut input),
@ -191,7 +194,7 @@ fn handle_digit(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>) ->
} }
} }
fn handle_quote(input: &mut Peekable<impl Iterator<Item=CharData>>) -> TokenKind { fn handle_quote(input: &mut Peekable<impl Iterator<Item=CharData>>, quote_prefix: Option<&str>) -> TokenKind {
let mut buf = String::new(); let mut buf = String::new();
loop { loop {
match input.next().map(|(_, _, c)| { c }) { match input.next().map(|(_, _, c)| { c }) {
@ -213,7 +216,7 @@ fn handle_quote(input: &mut Peekable<impl Iterator<Item=CharData>>) -> TokenKind
None => return TokenKind::Error(format!("Unclosed string")), None => return TokenKind::Error(format!("Unclosed string")),
} }
} }
TokenKind::StrLiteral(Rc::new(buf)) TokenKind::StrLiteral { s: Rc::new(buf), prefix: quote_prefix.map(|s| Rc::new(s.to_string())) }
} }
fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>) -> TokenKind { fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>) -> TokenKind {
@ -225,6 +228,10 @@ fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>
loop { loop {
match input.peek().map(|&(_, _, c)| { c }) { match input.peek().map(|&(_, _, c)| { c }) {
Some(c) if c == '"' => {
input.next();
return handle_quote(input, Some(&buf));
},
Some(c) if c.is_alphanumeric() || c == '_' => { Some(c) if c.is_alphanumeric() || c == '_' => {
input.next(); input.next();
buf.push(c); buf.push(c);
@ -325,4 +332,13 @@ mod schala_tokenizer_tests {
let token_kinds: Vec<TokenKind> = tokenize("1 `plus` 2").into_iter().map(move |t| t.kind).collect(); let token_kinds: Vec<TokenKind> = tokenize("1 `plus` 2").into_iter().map(move |t| t.kind).collect();
assert_eq!(token_kinds, vec![digit!("1"), op!("plus"), digit!("2")]); assert_eq!(token_kinds, vec![digit!("1"), op!("plus"), digit!("2")]);
} }
#[test]
fn string_literals() {
let token_kinds: Vec<TokenKind> = tokenize(r#""some string""#).into_iter().map(move |t| t.kind).collect();
assert_eq!(token_kinds, vec![StrLiteral { s: Rc::new("some string".to_string()), prefix: None }]);
let token_kinds: Vec<TokenKind> = tokenize(r#"b"some bytestring""#).into_iter().map(move |t| t.kind).collect();
assert_eq!(token_kinds, vec![StrLiteral { s: Rc::new("some bytestring".to_string()), prefix: Some(Rc::new("b".to_string())) }]);
}
} }