Ignore leading byte order mark in source files (#1021)

This commit is contained in:
Casey Rodarmor 2021-11-04 21:35:57 -07:00 committed by GitHub
parent f3abb95c78
commit 8b49c0cbd1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 66 additions and 2 deletions

View File

@ -490,6 +490,7 @@ impl<'src> Lexer<'src> {
'@' => self.lex_single(At),
'[' => self.lex_delimiter(BracketL),
'\n' | '\r' => self.lex_eol(),
'\u{feff}' => self.lex_single(ByteOrderMark),
']' => self.lex_delimiter(BracketR),
'`' | '"' | '\'' => self.lex_string(),
'{' => self.lex_delimiter(BraceL),
@ -926,6 +927,7 @@ mod tests {
BraceR => "}",
BracketL => "[",
BracketR => "]",
ByteOrderMark => "\u{feff}",
Colon => ":",
ColonEquals => ":=",
Comma => ",",

View File

@ -57,7 +57,12 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
/// `Parser::next`
fn unexpected_token(&self) -> CompileResult<'src, CompileError<'src>> {
self.error(CompileErrorKind::UnexpectedToken {
expected: self.expected.iter().cloned().collect::<Vec<TokenKind>>(),
expected: self
.expected
.iter()
.cloned()
.filter(|kind| *kind != ByteOrderMark)
.collect::<Vec<TokenKind>>(),
found: self.next()?.kind,
})
}
@ -302,6 +307,8 @@ impl<'tokens, 'src> Parser<'tokens, 'src> {
let mut eol_since_last_comment = false;
self.accept(ByteOrderMark)?;
loop {
let next = self.next()?;

View File

@ -11,6 +11,7 @@ pub(crate) enum TokenKind {
BraceR,
BracketL,
BracketR,
ByteOrderMark,
Colon,
ColonEquals,
Comma,
@ -51,6 +52,7 @@ impl Display for TokenKind {
BraceR => "'}'",
BracketL => "'['",
BracketR => "']'",
ByteOrderMark => "byte order mark",
Colon => "':'",
ColonEquals => "':='",
Comma => "','",
@ -61,6 +63,7 @@ impl Display for TokenKind {
Eol => "end of line",
Equals => "'='",
EqualsEquals => "'=='",
EqualsTilde => "'=~'",
Identifier => "identifier",
Indent => "indent",
InterpolationEnd => "'}}'",
@ -70,7 +73,6 @@ impl Display for TokenKind {
Plus => "'+'",
StringToken => "string",
Text => "command text",
EqualsTilde => "'=~'",
Unspecified => "unspecified",
Whitespace => "whitespace",
}

52
tests/byte_order_mark.rs Normal file
View File

@ -0,0 +1,52 @@
use crate::common::*;
#[test]
fn ignore_leading_byte_order_mark() {
Test::new()
.justfile(
"
\u{feff}foo:
echo bar
",
)
.stderr("echo bar\n")
.stdout("bar\n")
.run();
}
#[test]
fn non_leading_byte_order_mark_produces_error() {
Test::new()
.justfile(
"
foo:
echo bar
\u{feff}
",
)
.stderr(
"
error: Expected \'@\', comment, end of file, end of line, or identifier, but found byte order mark
|
3 | \u{feff}
| ^
")
.status(EXIT_FAILURE)
.run();
}
#[test]
fn dont_mention_byte_order_mark_in_errors() {
Test::new()
.justfile("{")
.stderr(
"
error: Expected '@', comment, end of file, end of line, or identifier, but found '{'
|
1 | {
| ^
",
)
.status(EXIT_FAILURE)
.run();
}

View File

@ -3,6 +3,7 @@ mod test;
mod assert_stdout;
mod assert_success;
mod byte_order_mark;
mod changelog;
mod choose;
mod command;