Automatically track expected tokens while parsing (#711)

Remove all manual tracking of which tokens would have been accepted by
the parser in favor of having the parser add tokens that it checks for
to a set of expected tokens, clearing them when it accepts a token, and
using the current contents of the set in error messages.

This is a massive improvement, and will make the parser easier to
modify going forward.

And, this actually solves my sole issue with hand-written parsers.

Thanks to matklad on reddit for suggesting this!
This commit is contained in:
Casey Rodarmor 2020-10-25 19:37:26 -07:00 committed by GitHub
parent d7799ebec4
commit bdf1c92251
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 98 additions and 143 deletions

View File

@ -34,9 +34,8 @@ pub(crate) use crate::{default::default, empty::empty, load_dotenv::load_dotenv,
// traits // traits
pub(crate) use crate::{ pub(crate) use crate::{
command_ext::CommandExt, compilation_result_ext::CompilationResultExt, error::Error, command_ext::CommandExt, error::Error, error_result_ext::ErrorResultExt, keyed::Keyed,
error_result_ext::ErrorResultExt, keyed::Keyed, ordinal::Ordinal, ordinal::Ordinal, platform_interface::PlatformInterface, range_ext::RangeExt,
platform_interface::PlatformInterface, range_ext::RangeExt,
}; };
// structs and enums // structs and enums

View File

@ -1,23 +0,0 @@
use crate::common::*;
pub(crate) trait CompilationResultExt {
fn expected(self, kinds: &[TokenKind]) -> Self;
}
impl<'src, T> CompilationResultExt for CompilationResult<'src, T> {
fn expected(mut self, kinds: &[TokenKind]) -> Self {
if let Err(CompilationError {
kind: CompilationErrorKind::UnexpectedToken {
ref mut expected, ..
},
..
}) = &mut self
{
expected.extend_from_slice(kinds);
expected.sort();
expected.dedup();
}
self
}
}

View File

@ -60,7 +60,6 @@ mod command_ext;
mod common; mod common;
mod compilation_error; mod compilation_error;
mod compilation_error_kind; mod compilation_error_kind;
mod compilation_result_ext;
mod compiler; mod compiler;
mod config; mod config;
mod config_error; mod config_error;

View File

@ -18,11 +18,20 @@ use TokenKind::*;
/// and not a syntax error. /// and not a syntax error.
/// ///
/// All methods starting with `parse_*` parse and return a language construct. /// All methods starting with `parse_*` parse and return a language construct.
///
/// The parser tracks an expected set of tokens as it parses. This set contains
/// all tokens which would have been accepted at the current point in the parse.
/// Whenever the parser tests for a token that would be accepted, but does not
/// find it, it adds that token to the set. When the parser accepts a token, the
/// set is cleared. If the parser finds a token which is unexpected, the
/// contents of the set is printed in the resultant error message.
pub(crate) struct Parser<'tokens, 'src> { pub(crate) struct Parser<'tokens, 'src> {
/// Source tokens /// Source tokens
tokens: &'tokens [Token<'src>], tokens: &'tokens [Token<'src>],
/// Index of the next un-parsed token /// Index of the next un-parsed token
next: usize, next: usize,
/// Current expected tokens
expected: BTreeSet<TokenKind>,
} }
impl<'tokens, 'src> Parser<'tokens, 'src> { impl<'tokens, 'src> Parser<'tokens, 'src> {
@ -33,7 +42,11 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
/// Construct a new Paser from a token stream /// Construct a new Paser from a token stream
fn new(tokens: &'tokens [Token<'src>]) -> Parser<'tokens, 'src> { fn new(tokens: &'tokens [Token<'src>]) -> Parser<'tokens, 'src> {
Parser { next: 0, tokens } Parser {
next: 0,
expected: BTreeSet::new(),
tokens,
}
} }
fn error( fn error(
@ -45,16 +58,10 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
/// Construct an unexpected token error with the token returned by /// Construct an unexpected token error with the token returned by
/// `Parser::next` /// `Parser::next`
fn unexpected_token( fn unexpected_token(&self) -> CompilationResult<'src, CompilationError<'src>> {
&self,
expected: &[TokenKind],
) -> CompilationResult<'src, CompilationError<'src>> {
let mut expected = expected.to_vec();
expected.sort();
self.error(CompilationErrorKind::UnexpectedToken { self.error(CompilationErrorKind::UnexpectedToken {
expected, expected: self.expected.iter().cloned().collect::<Vec<TokenKind>>(),
found: self.next()?.kind, found: self.next()?.kind,
}) })
} }
@ -85,12 +92,18 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
} }
/// Check if the next significant token is of kind `kind` /// Check if the next significant token is of kind `kind`
fn next_is(&self, kind: TokenKind) -> bool { fn next_is(&mut self, kind: TokenKind) -> bool {
self.next_are(&[kind]) self.next_are(&[kind])
} }
/// Check if the next significant tokens are of kinds `kinds` /// Check if the next significant tokens are of kinds `kinds`
fn next_are(&self, kinds: &[TokenKind]) -> bool { ///
/// The first token in `kinds` will be added to the expected token set.
fn next_are(&mut self, kinds: &[TokenKind]) -> bool {
if let Some(kind) = kinds.first() {
self.expected.insert(*kind);
}
let mut rest = self.rest(); let mut rest = self.rest();
for kind in kinds { for kind in kinds {
match rest.next() { match rest.next() {
@ -112,8 +125,10 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
} }
} }
/// Advance past one significant token /// Advance past one significant token, clearing the expected token set.
fn advance(&mut self) -> CompilationResult<'src, Token<'src>> { fn advance(&mut self) -> CompilationResult<'src, Token<'src>> {
self.expected.clear();
for skipped in &self.tokens[self.next..] { for skipped in &self.tokens[self.next..] {
self.next += 1; self.next += 1;
@ -131,7 +146,7 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
if let Some(token) = self.accept(expected)? { if let Some(token) = self.accept(expected)? {
Ok(token) Ok(token)
} else { } else {
Err(self.unexpected_token(&[expected])?) Err(self.unexpected_token()?)
} }
} }
@ -143,7 +158,7 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
} }
} }
Err(self.unexpected_token(expected)?) Err(self.unexpected_token()?)
} }
/// Return an unexpected token error if the next token is not an EOL /// Return an unexpected token error if the next token is not an EOL
@ -154,7 +169,7 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
return Ok(()); return Ok(());
} }
self.expect(Eol).map(|_| ()).expected(&[Eof]) self.expect(Eol).map(|_| ())
} }
/// Return an internal error if the next token is not of kind `Identifier` /// Return an internal error if the next token is not of kind `Identifier`
@ -208,10 +223,8 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
/// Accept and return a token of kind `kind` /// Accept and return a token of kind `kind`
fn accept(&mut self, kind: TokenKind) -> CompilationResult<'src, Option<Token<'src>>> { fn accept(&mut self, kind: TokenKind) -> CompilationResult<'src, Option<Token<'src>>> {
let next = self.next()?; if self.next_is(kind) {
if next.kind == kind { Ok(Some(self.advance()?))
self.advance()?;
Ok(Some(next))
} else { } else {
Ok(None) Ok(None)
} }
@ -263,19 +276,14 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
loop { loop {
let next = self.next()?; let next = self.next()?;
match next.kind { if let Some(comment) = self.accept(Comment)? {
Comment => { doc = Some(comment.lexeme()[1..].trim());
doc = Some(next.lexeme()[1..].trim()); self.expect_eol()?;
self.expect_eol()?; } else if self.accepted(Eol)? {
}, } else if self.accepted(Eof)? {
Eol => { break;
self.advance()?; } else if self.next_is(Identifier) {
}, match next.lexeme() {
Eof => {
self.advance()?;
break;
},
Identifier => match next.lexeme() {
keyword::ALIAS => keyword::ALIAS =>
if self.next_are(&[Identifier, Identifier, Equals]) { if self.next_are(&[Identifier, Identifier, Equals]) {
warnings.push(Warning::DeprecatedEquals { warnings.push(Warning::DeprecatedEquals {
@ -317,14 +325,11 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
} else { } else {
items.push(Item::Recipe(self.parse_recipe(doc, false)?)); items.push(Item::Recipe(self.parse_recipe(doc, false)?));
}, },
}, }
At => { } else if self.accepted(At)? {
self.presume(At)?; items.push(Item::Recipe(self.parse_recipe(doc, true)?));
items.push(Item::Recipe(self.parse_recipe(doc, true)?)); } else {
}, return Err(self.unexpected_token()?);
_ => {
return Err(self.unexpected_token(&[Identifier, At])?);
},
} }
if next.kind != Comment { if next.kind != Comment {
@ -380,36 +385,34 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
/// Parse a value, e.g. `(bar)` /// Parse a value, e.g. `(bar)`
fn parse_value(&mut self) -> CompilationResult<'src, Expression<'src>> { fn parse_value(&mut self) -> CompilationResult<'src, Expression<'src>> {
let next = self.next()?; if self.next_is(StringCooked) || self.next_is(StringRaw) {
Ok(Expression::StringLiteral {
match next.kind {
StringCooked | StringRaw => Ok(Expression::StringLiteral {
string_literal: self.parse_string_literal()?, string_literal: self.parse_string_literal()?,
}), })
Backtick => { } else if self.next_is(Backtick) {
let contents = &next.lexeme()[1..next.lexeme().len() - 1]; let next = self.next()?;
let token = self.advance()?;
Ok(Expression::Backtick { contents, token })
},
Identifier => {
let name = self.parse_name()?;
if self.next_is(ParenL) { let contents = &next.lexeme()[1..next.lexeme().len() - 1];
let arguments = self.parse_sequence()?; let token = self.advance()?;
Ok(Expression::Call { Ok(Expression::Backtick { contents, token })
thunk: Thunk::resolve(name, arguments)?, } else if self.next_is(Identifier) {
}) let name = self.parse_name()?;
} else {
Ok(Expression::Variable { name }) if self.next_is(ParenL) {
} let arguments = self.parse_sequence()?;
}, Ok(Expression::Call {
ParenL => { thunk: Thunk::resolve(name, arguments)?,
self.presume(ParenL)?; })
let contents = Box::new(self.parse_expression()?); } else {
self.expect(ParenR)?; Ok(Expression::Variable { name })
Ok(Expression::Group { contents }) }
}, } else if self.next_is(ParenL) {
_ => Err(self.unexpected_token(&[StringCooked, StringRaw, Backtick, Identifier, ParenL])?), self.presume(ParenL)?;
let contents = Box::new(self.parse_expression()?);
self.expect(ParenR)?;
Ok(Expression::Group { contents })
} else {
Err(self.unexpected_token()?)
} }
} }
@ -471,7 +474,7 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
let mut elements = Vec::new(); let mut elements = Vec::new();
while !self.next_is(ParenR) { while !self.next_is(ParenR) {
elements.push(self.parse_expression().expected(&[ParenR])?); elements.push(self.parse_expression()?);
if !self.accepted(Comma)? { if !self.accepted(Comma)? {
break; break;
@ -508,10 +511,12 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
let variadic = if kind.is_variadic() { let variadic = if kind.is_variadic() {
let variadic = self.parse_parameter(kind)?; let variadic = self.parse_parameter(kind)?;
if let Some(identifier) = self.accept(Identifier)? { let next = self.next()?;
if next.kind == Identifier {
return Err( return Err(
identifier.error(CompilationErrorKind::ParameterFollowsVariadicParameter { next.error(CompilationErrorKind::ParameterFollowsVariadicParameter {
parameter: identifier.lexeme(), parameter: next.lexeme(),
}), }),
); );
} }
@ -521,29 +526,7 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
None None
}; };
let result = self.expect(Colon); self.expect(Colon)?;
if result.is_err() {
let mut alternatives = Vec::new();
if variadic.is_none() {
alternatives.push(Identifier);
}
if !quiet && variadic.is_none() && positional.is_empty() {
alternatives.push(ColonEquals);
}
if variadic.is_some() || !positional.is_empty() {
alternatives.push(Equals);
}
if variadic.is_none() {
alternatives.push(Plus);
}
result.expected(&alternatives)?;
}
let mut dependencies = Vec::new(); let mut dependencies = Vec::new();
@ -551,7 +534,7 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
dependencies.push(dependency); dependencies.push(dependency);
} }
self.expect_eol().expected(&[Identifier])?; self.expect_eol()?;
let body = self.parse_body()?; let body = self.parse_body()?;
@ -606,7 +589,7 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
}); });
self.expect(InterpolationEnd)?; self.expect(InterpolationEnd)?;
} else { } else {
return Err(self.unexpected_token(&[Text, InterpolationStart])?); return Err(self.unexpected_token()?);
} }
} }
@ -637,24 +620,17 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
let mut arguments = Vec::new(); let mut arguments = Vec::new();
let mut comma = false;
if self.accepted(Comma)? { if self.accepted(Comma)? {
comma = true;
while !self.next_is(BracketR) { while !self.next_is(BracketR) {
arguments.push(self.parse_string_literal().expected(&[BracketR])?); arguments.push(self.parse_string_literal()?);
if !self.accepted(Comma)? { if !self.accepted(Comma)? {
comma = false;
break; break;
} }
comma = true;
} }
} }
self self.expect(BracketR)?;
.expect(BracketR)
.expected(if comma { &[] } else { &[Comma] })?;
Ok(Set { Ok(Set {
value: Setting::Shell(setting::Shell { command, arguments }), value: Setting::Shell(setting::Shell { command, arguments }),
@ -1530,7 +1506,7 @@ mod tests {
line: 0, line: 0,
column: 16, column: 16,
width: 3, width: 3,
kind: UnexpectedToken { expected: vec![Eof, Eol], found: Identifier }, kind: UnexpectedToken { expected: vec![Comment, Eof, Eol], found: Identifier },
} }
error! { error! {
@ -1550,7 +1526,7 @@ mod tests {
line: 0, line: 0,
column: 5, column: 5,
width: 1, width: 1,
kind: UnexpectedToken{expected: vec![Colon, Equals, Identifier, Plus], found: Eol}, kind: UnexpectedToken{expected: vec![Asterisk, Colon, Equals, Identifier, Plus], found: Eol},
} }
error! { error! {
@ -1586,7 +1562,7 @@ mod tests {
line: 0, line: 0,
column: 9, column: 9,
width: 1, width: 1,
kind: UnexpectedToken{expected: vec![Eof, Eol, Identifier], found: Equals}, kind: UnexpectedToken{expected: vec![Comment, Eof, Eol, Identifier, ParenL], found: Equals},
} }
error! { error! {
@ -1596,7 +1572,10 @@ mod tests {
line: 0, line: 0,
column: 0, column: 0,
width: 2, width: 2,
kind: UnexpectedToken{expected: vec![At, Identifier], found: InterpolationStart}, kind: UnexpectedToken {
expected: vec![At, Comment, Eof, Eol, Identifier],
found: InterpolationStart,
},
} }
error! { error! {
@ -1652,7 +1631,7 @@ mod tests {
line: 0, line: 0,
column: 8, column: 8,
width: 0, width: 0,
kind: UnexpectedToken{expected: vec![Colon, Equals, Identifier, Plus], found: Eof}, kind: UnexpectedToken{expected: vec![Asterisk, Colon, Equals, Identifier, Plus], found: Eof},
} }
error! { error! {

View File

@ -1490,7 +1490,8 @@ test! {
justfile: "foo: 'bar'", justfile: "foo: 'bar'",
args: ("foo"), args: ("foo"),
stdout: "", stdout: "",
stderr: "error: Expected end of file, end of line, or identifier, but found raw string stderr: "error: Expected comment, end of file, end of line, \
identifier, or '(', but found raw string
| |
1 | foo: 'bar' 1 | foo: 'bar'
| ^^^^^ | ^^^^^
@ -1503,7 +1504,7 @@ test! {
justfile: "foo 'bar'", justfile: "foo 'bar'",
args: ("foo"), args: ("foo"),
stdout: "", stdout: "",
stderr: "error: Expected ':', ':=', identifier, or '+', but found raw string stderr: "error: Expected '*', ':', identifier, or '+', but found raw string
| |
1 | foo 'bar' 1 | foo 'bar'
| ^^^^^ | ^^^^^