Automatically track expected tokens while parsing (#711)

Remove all manual tracking of which tokens would have been accepted by
the parser in favor of having the parser add tokens that it checks for
to a set of expected tokens, clearing them when it accepts a token, and
using the current contents of the set in error messages.

This is a massive improvement, and will make the parser easier to
modify going forward.

And, this actually solves my sole issue with hand-written parsers.

Thanks to matklad on reddit for suggesting this!
This commit is contained in:
Casey Rodarmor 2020-10-25 19:37:26 -07:00 committed by GitHub
parent d7799ebec4
commit bdf1c92251
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 98 additions and 143 deletions

View File

@ -34,9 +34,8 @@ pub(crate) use crate::{default::default, empty::empty, load_dotenv::load_dotenv,
// traits
pub(crate) use crate::{
command_ext::CommandExt, compilation_result_ext::CompilationResultExt, error::Error,
error_result_ext::ErrorResultExt, keyed::Keyed, ordinal::Ordinal,
platform_interface::PlatformInterface, range_ext::RangeExt,
command_ext::CommandExt, error::Error, error_result_ext::ErrorResultExt, keyed::Keyed,
ordinal::Ordinal, platform_interface::PlatformInterface, range_ext::RangeExt,
};
// structs and enums

View File

@ -1,23 +0,0 @@
use crate::common::*;
pub(crate) trait CompilationResultExt {
fn expected(self, kinds: &[TokenKind]) -> Self;
}
impl<'src, T> CompilationResultExt for CompilationResult<'src, T> {
fn expected(mut self, kinds: &[TokenKind]) -> Self {
if let Err(CompilationError {
kind: CompilationErrorKind::UnexpectedToken {
ref mut expected, ..
},
..
}) = &mut self
{
expected.extend_from_slice(kinds);
expected.sort();
expected.dedup();
}
self
}
}

View File

@ -60,7 +60,6 @@ mod command_ext;
mod common;
mod compilation_error;
mod compilation_error_kind;
mod compilation_result_ext;
mod compiler;
mod config;
mod config_error;

View File

@ -18,11 +18,20 @@ use TokenKind::*;
/// and not a syntax error.
///
/// All methods starting with `parse_*` parse and return a language construct.
///
/// The parser tracks an expected set of tokens as it parses. This set contains
/// all tokens which would have been accepted at the current point in the parse.
/// Whenever the parser tests for a token that would be accepted, but does not
/// find it, it adds that token to the set. When the parser accepts a token, the
/// set is cleared. If the parser finds a token which is unexpected, the
/// contents of the set is printed in the resultant error message.
pub(crate) struct Parser<'tokens, 'src> {
/// Source tokens
tokens: &'tokens [Token<'src>],
tokens: &'tokens [Token<'src>],
/// Index of the next un-parsed token
next: usize,
next: usize,
/// Current expected tokens
expected: BTreeSet<TokenKind>,
}
impl<'tokens, 'src> Parser<'tokens, 'src> {
@ -33,7 +42,11 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
/// Construct a new Paser from a token stream
fn new(tokens: &'tokens [Token<'src>]) -> Parser<'tokens, 'src> {
Parser { next: 0, tokens }
Parser {
next: 0,
expected: BTreeSet::new(),
tokens,
}
}
fn error(
@ -45,16 +58,10 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
/// Construct an unexpected token error with the token returned by
/// `Parser::next`
fn unexpected_token(
&self,
expected: &[TokenKind],
) -> CompilationResult<'src, CompilationError<'src>> {
let mut expected = expected.to_vec();
expected.sort();
fn unexpected_token(&self) -> CompilationResult<'src, CompilationError<'src>> {
self.error(CompilationErrorKind::UnexpectedToken {
expected,
found: self.next()?.kind,
expected: self.expected.iter().cloned().collect::<Vec<TokenKind>>(),
found: self.next()?.kind,
})
}
@ -85,12 +92,18 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
}
/// Check if the next significant token is of kind `kind`
fn next_is(&self, kind: TokenKind) -> bool {
fn next_is(&mut self, kind: TokenKind) -> bool {
self.next_are(&[kind])
}
/// Check if the next significant tokens are of kinds `kinds`
fn next_are(&self, kinds: &[TokenKind]) -> bool {
///
/// The first token in `kinds` will be added to the expected token set.
fn next_are(&mut self, kinds: &[TokenKind]) -> bool {
if let Some(kind) = kinds.first() {
self.expected.insert(*kind);
}
let mut rest = self.rest();
for kind in kinds {
match rest.next() {
@ -112,8 +125,10 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
}
}
/// Advance past one significant token
/// Advance past one significant token, clearing the expected token set.
fn advance(&mut self) -> CompilationResult<'src, Token<'src>> {
self.expected.clear();
for skipped in &self.tokens[self.next..] {
self.next += 1;
@ -131,7 +146,7 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
if let Some(token) = self.accept(expected)? {
Ok(token)
} else {
Err(self.unexpected_token(&[expected])?)
Err(self.unexpected_token()?)
}
}
@ -143,7 +158,7 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
}
}
Err(self.unexpected_token(expected)?)
Err(self.unexpected_token()?)
}
/// Return an unexpected token error if the next token is not an EOL
@ -154,7 +169,7 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
return Ok(());
}
self.expect(Eol).map(|_| ()).expected(&[Eof])
self.expect(Eol).map(|_| ())
}
/// Return an internal error if the next token is not of kind `Identifier`
@ -208,10 +223,8 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
/// Accept and return a token of kind `kind`
fn accept(&mut self, kind: TokenKind) -> CompilationResult<'src, Option<Token<'src>>> {
let next = self.next()?;
if next.kind == kind {
self.advance()?;
Ok(Some(next))
if self.next_is(kind) {
Ok(Some(self.advance()?))
} else {
Ok(None)
}
@ -263,19 +276,14 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
loop {
let next = self.next()?;
match next.kind {
Comment => {
doc = Some(next.lexeme()[1..].trim());
self.expect_eol()?;
},
Eol => {
self.advance()?;
},
Eof => {
self.advance()?;
break;
},
Identifier => match next.lexeme() {
if let Some(comment) = self.accept(Comment)? {
doc = Some(comment.lexeme()[1..].trim());
self.expect_eol()?;
} else if self.accepted(Eol)? {
} else if self.accepted(Eof)? {
break;
} else if self.next_is(Identifier) {
match next.lexeme() {
keyword::ALIAS =>
if self.next_are(&[Identifier, Identifier, Equals]) {
warnings.push(Warning::DeprecatedEquals {
@ -317,14 +325,11 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
} else {
items.push(Item::Recipe(self.parse_recipe(doc, false)?));
},
},
At => {
self.presume(At)?;
items.push(Item::Recipe(self.parse_recipe(doc, true)?));
},
_ => {
return Err(self.unexpected_token(&[Identifier, At])?);
},
}
} else if self.accepted(At)? {
items.push(Item::Recipe(self.parse_recipe(doc, true)?));
} else {
return Err(self.unexpected_token()?);
}
if next.kind != Comment {
@ -380,36 +385,34 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
/// Parse a value, e.g. `(bar)`
fn parse_value(&mut self) -> CompilationResult<'src, Expression<'src>> {
let next = self.next()?;
match next.kind {
StringCooked | StringRaw => Ok(Expression::StringLiteral {
if self.next_is(StringCooked) || self.next_is(StringRaw) {
Ok(Expression::StringLiteral {
string_literal: self.parse_string_literal()?,
}),
Backtick => {
let contents = &next.lexeme()[1..next.lexeme().len() - 1];
let token = self.advance()?;
Ok(Expression::Backtick { contents, token })
},
Identifier => {
let name = self.parse_name()?;
})
} else if self.next_is(Backtick) {
let next = self.next()?;
if self.next_is(ParenL) {
let arguments = self.parse_sequence()?;
Ok(Expression::Call {
thunk: Thunk::resolve(name, arguments)?,
})
} else {
Ok(Expression::Variable { name })
}
},
ParenL => {
self.presume(ParenL)?;
let contents = Box::new(self.parse_expression()?);
self.expect(ParenR)?;
Ok(Expression::Group { contents })
},
_ => Err(self.unexpected_token(&[StringCooked, StringRaw, Backtick, Identifier, ParenL])?),
let contents = &next.lexeme()[1..next.lexeme().len() - 1];
let token = self.advance()?;
Ok(Expression::Backtick { contents, token })
} else if self.next_is(Identifier) {
let name = self.parse_name()?;
if self.next_is(ParenL) {
let arguments = self.parse_sequence()?;
Ok(Expression::Call {
thunk: Thunk::resolve(name, arguments)?,
})
} else {
Ok(Expression::Variable { name })
}
} else if self.next_is(ParenL) {
self.presume(ParenL)?;
let contents = Box::new(self.parse_expression()?);
self.expect(ParenR)?;
Ok(Expression::Group { contents })
} else {
Err(self.unexpected_token()?)
}
}
@ -471,7 +474,7 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
let mut elements = Vec::new();
while !self.next_is(ParenR) {
elements.push(self.parse_expression().expected(&[ParenR])?);
elements.push(self.parse_expression()?);
if !self.accepted(Comma)? {
break;
@ -508,10 +511,12 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
let variadic = if kind.is_variadic() {
let variadic = self.parse_parameter(kind)?;
if let Some(identifier) = self.accept(Identifier)? {
let next = self.next()?;
if next.kind == Identifier {
return Err(
identifier.error(CompilationErrorKind::ParameterFollowsVariadicParameter {
parameter: identifier.lexeme(),
next.error(CompilationErrorKind::ParameterFollowsVariadicParameter {
parameter: next.lexeme(),
}),
);
}
@ -521,29 +526,7 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
None
};
let result = self.expect(Colon);
if result.is_err() {
let mut alternatives = Vec::new();
if variadic.is_none() {
alternatives.push(Identifier);
}
if !quiet && variadic.is_none() && positional.is_empty() {
alternatives.push(ColonEquals);
}
if variadic.is_some() || !positional.is_empty() {
alternatives.push(Equals);
}
if variadic.is_none() {
alternatives.push(Plus);
}
result.expected(&alternatives)?;
}
self.expect(Colon)?;
let mut dependencies = Vec::new();
@ -551,7 +534,7 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
dependencies.push(dependency);
}
self.expect_eol().expected(&[Identifier])?;
self.expect_eol()?;
let body = self.parse_body()?;
@ -606,7 +589,7 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
});
self.expect(InterpolationEnd)?;
} else {
return Err(self.unexpected_token(&[Text, InterpolationStart])?);
return Err(self.unexpected_token()?);
}
}
@ -637,24 +620,17 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
let mut arguments = Vec::new();
let mut comma = false;
if self.accepted(Comma)? {
comma = true;
while !self.next_is(BracketR) {
arguments.push(self.parse_string_literal().expected(&[BracketR])?);
arguments.push(self.parse_string_literal()?);
if !self.accepted(Comma)? {
comma = false;
break;
}
comma = true;
}
}
self
.expect(BracketR)
.expected(if comma { &[] } else { &[Comma] })?;
self.expect(BracketR)?;
Ok(Set {
value: Setting::Shell(setting::Shell { command, arguments }),
@ -1530,7 +1506,7 @@ mod tests {
line: 0,
column: 16,
width: 3,
kind: UnexpectedToken { expected: vec![Eof, Eol], found: Identifier },
kind: UnexpectedToken { expected: vec![Comment, Eof, Eol], found: Identifier },
}
error! {
@ -1550,7 +1526,7 @@ mod tests {
line: 0,
column: 5,
width: 1,
kind: UnexpectedToken{expected: vec![Colon, Equals, Identifier, Plus], found: Eol},
kind: UnexpectedToken{expected: vec![Asterisk, Colon, Equals, Identifier, Plus], found: Eol},
}
error! {
@ -1586,7 +1562,7 @@ mod tests {
line: 0,
column: 9,
width: 1,
kind: UnexpectedToken{expected: vec![Eof, Eol, Identifier], found: Equals},
kind: UnexpectedToken{expected: vec![Comment, Eof, Eol, Identifier, ParenL], found: Equals},
}
error! {
@ -1596,7 +1572,10 @@ mod tests {
line: 0,
column: 0,
width: 2,
kind: UnexpectedToken{expected: vec![At, Identifier], found: InterpolationStart},
kind: UnexpectedToken {
expected: vec![At, Comment, Eof, Eol, Identifier],
found: InterpolationStart,
},
}
error! {
@ -1652,7 +1631,7 @@ mod tests {
line: 0,
column: 8,
width: 0,
kind: UnexpectedToken{expected: vec![Colon, Equals, Identifier, Plus], found: Eof},
kind: UnexpectedToken{expected: vec![Asterisk, Colon, Equals, Identifier, Plus], found: Eof},
}
error! {

View File

@ -1490,7 +1490,8 @@ test! {
justfile: "foo: 'bar'",
args: ("foo"),
stdout: "",
stderr: "error: Expected end of file, end of line, or identifier, but found raw string
stderr: "error: Expected comment, end of file, end of line, \
identifier, or '(', but found raw string
|
1 | foo: 'bar'
| ^^^^^
@ -1503,7 +1504,7 @@ test! {
justfile: "foo 'bar'",
args: ("foo"),
stdout: "",
stderr: "error: Expected ':', ':=', identifier, or '+', but found raw string
stderr: "error: Expected '*', ':', identifier, or '+', but found raw string
|
1 | foo 'bar'
| ^^^^^