From 596ea3446058ec05a2b43d84c90d05f86555f8ae Mon Sep 17 00:00:00 2001 From: Casey Rodarmor Date: Mon, 15 Apr 2019 22:40:02 -0700 Subject: [PATCH] Replace regex-based lexer with character-at-a-time lexer (#406) --- Cargo.lock | 1 - Cargo.toml | 1 - README.adoc | 80 +- functions.rs | 30 + justfile | 2 +- src/assignment_evaluator.rs | 8 +- src/assignment_resolver.rs | 20 +- src/color.rs | 30 +- src/common.rs | 19 +- src/compilation_error.rs | 21 +- src/expression.rs | 62 +- src/fragment.rs | 2 +- src/function.rs | 9 +- src/function_context.rs | 6 + src/functions.rs | 36 + src/interrupt_guard.rs | 16 + src/interrupt_handler.rs | 23 +- src/lexer.rs | 1251 ++++++++++++------- src/lib.rs | 11 +- src/misc.rs | 43 +- src/parser.rs | 221 ++-- src/position.rs | 7 + src/recipe.rs | 16 +- src/recipe_context.rs | 7 + src/recipe_resolver.rs | 46 +- src/run.rs | 34 +- src/runtime_error.rs | 13 +- src/state.rs | 9 + src/{cooked_string.rs => string_literal.rs} | 18 +- src/summary.rs | 2 +- src/testing.rs | 55 +- src/token.rs | 71 +- src/token_kind.rs | 59 + src/use_color.rs | 6 + src/variables.rs | 34 + tests/integration.rs | 91 +- tests/interrupts.rs | 2 + variables.rs | 28 + 38 files changed, 1521 insertions(+), 869 deletions(-) create mode 100644 functions.rs create mode 100644 src/function_context.rs create mode 100644 src/functions.rs create mode 100644 src/interrupt_guard.rs create mode 100644 src/position.rs create mode 100644 src/recipe_context.rs create mode 100644 src/state.rs rename src/{cooked_string.rs => string_literal.rs} (76%) create mode 100644 src/token_kind.rs create mode 100644 src/use_color.rs create mode 100644 src/variables.rs create mode 100644 variables.rs diff --git a/Cargo.lock b/Cargo.lock index 3f05062..b18f411 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -206,7 +206,6 @@ dependencies = [ "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.1.5 (registry+https://github.com/rust-lang/crates.io-index)", "target 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index bcf21dd..aea58b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,6 @@ itertools = "0.8.0" lazy_static = "1.0.0" libc = "0.2.21" log = "0.4.4" -regex = "1.0.0" target = "1.0.0" tempdir = "0.3.5" unicode-width = "0.1.3" diff --git a/README.adoc b/README.adoc index 3ffb19a..e525de3 100644 --- a/README.adoc +++ b/README.adoc @@ -710,40 +710,6 @@ When a script with a shebang is executed, the system supplies the path to the sc With the above shebang, `just` will change its working directory to the location of the script. If you'd rather leave the working directory unchanged, use `#!/usr/bin/env just --working-directory . --justfile`. -== Frequently Asked Questions - -=== What are the idiosyncrasies of make that just avoids? - -Make has some behaviors which are either confusing, complicated, or make it unsuitable for use as a general command runner. - -One example is that sometimes make won't run the commands in a recipe. For example, if you have a file called `test` and the following makefile that runs it: - -```make -test: - ./test -``` - -Make will actually refuse to run it: - -```sh -$ make test -make: `test' is up to date. -``` - -Make sees the recipe `test` and assumes that it produces a file called `test`. It then sees that this file exists and thus assumes that the recipe doesn't need to be run. - -To be fair, this behavior is desirable when using make as a build system, but not when using it as a command runner. - -Some other examples include having to understand the difference between `=` and `:=` assignment, the confusing error messages that can be produced if you mess up your makefile, having to use `$$` to write recipes that use environment variables, and incompatibilites between different flavors of make. - -=== What's the relationship between just and cargo build scripts? - -http://doc.crates.io/build-script.html[Cargo build scripts] have a pretty specific use, which is to control how cargo builds your rust project. This might include adding flags to `rustc` invocations, building an external dependency, or running some kind of codegen step. - -`just`, on the other hand, is for all the other miscellaneous commands you might run as part of development. Things like running tests in different configurations, linting your code, pushing build artifacts to a server, removing temporary files, and the like. - -Also, although `just` is written in rust, it can be used regardless of the language or build system your project uses. - == Miscellanea === Companion Tools @@ -813,7 +779,7 @@ Before `just` was a fancy rust program it was a tiny shell script that called `m === Non-Project Specific Justfile -If you want some commands to be available everwhere, put them in `~/.justfile` and add the following to your shell's initialization file: +If you want some commands to be available everywhere, put them in `~/.justfile` and add the following to your shell's initialization file: ```sh alias .j='just --justfile ~/.justfile --working-directory ~' @@ -829,6 +795,50 @@ I'm pretty sure that nobody actually uses this feature, but it's there. ¯\\_(ツ)_/¯ +== Contributing + +`just` welcomes your contributions! `just` is released under the maximally permissive [CC0](https://creativecommons.org/publicdomain/zero/1.0/legalcode.txt) public domain dedication and fallback license, so your changes must also released under this license. + +=== Janus + +[Janus](https://github.com/casey/janus) is a tool that collects and analyzes justfiles, and can determine if a new version of `just` breaks or changes the interpretation of existing justfiles. + +Before merging a particularly large or gruesome change, Janus should be run to make sure that nothing breaks. Don't worry about running Janus yourself, Casey will happily run it for you on changes that need it. + +== Frequently Asked Questions + +=== What are the idiosyncrasies of make that just avoids? + +Make has some behaviors which are either confusing, complicated, or make it unsuitable for use as a general command runner. + +One example is that sometimes make won't run the commands in a recipe. For example, if you have a file called `test` and the following makefile that runs it: + +```make +test: + ./test +``` + +Make will actually refuse to run it: + +```sh +$ make test +make: `test' is up to date. +``` + +Make sees the recipe `test` and assumes that it produces a file called `test`. It then sees that this file exists and thus assumes that the recipe doesn't need to be run. + +To be fair, this behavior is desirable when using make as a build system, but not when using it as a command runner. + +Some other examples include having to understand the difference between `=` and `:=` assignment, the confusing error messages that can be produced if you mess up your makefile, having to use `$$` to write recipes that use environment variables, and incompatibilites between different flavors of make. + +=== What's the relationship between just and cargo build scripts? + +http://doc.crates.io/build-script.html[Cargo build scripts] have a pretty specific use, which is to control how cargo builds your rust project. This might include adding flags to `rustc` invocations, building an external dependency, or running some kind of codegen step. + +`just`, on the other hand, is for all the other miscellaneous commands you might run as part of development. Things like running tests in different configurations, linting your code, pushing build artifacts to a server, removing temporary files, and the like. + +Also, although `just` is written in rust, it can be used regardless of the language or build system your project uses. + == Further Ramblings I personally find it very useful to write a `justfile` for almost every project, big or small. diff --git a/functions.rs b/functions.rs new file mode 100644 index 0000000..7caa80e --- /dev/null +++ b/functions.rs @@ -0,0 +1,30 @@ +use crate::common::*; + +pub struct Functions<'a> { + stack: Vec<&'a Expression<'a>>, +} + +impl<'a> Iterator for Functions<'a> { + type Item = (&'a Token<'a>, usize); + + fn next(&mut self) -> Option { + match self.stack.pop() { + None + | Some(Expression::String { .. }) + | Some(Expression::Backtick { .. }) + | Some(Expression::Variable { .. }) => None, + Some(Expression::Call { + token, arguments, .. + }) => Some((token, arguments.len())), + Some(Expression::Concatination { lhs, rhs }) => { + self.stack.push(lhs); + self.stack.push(rhs); + self.next() + } + Some(Expression::Group { expression }) => { + self.stack.push(expression); + self.next() + } + } + } +} diff --git a/justfile b/justfile index 7e9dda1..73bba69 100755 --- a/justfile +++ b/justfile @@ -83,7 +83,7 @@ sloc: ! grep --color -En '.{101}' src/*.rs replace FROM TO: - find src -name '*.rs' | xargs sed -i '' -E 's/{{FROM}}/{{TO}}/g' + sd -i '{{FROM}}' '{{TO}}' src/*.rs test-quine: cargo run -- quine diff --git a/src/assignment_evaluator.rs b/src/assignment_evaluator.rs index 3767b3c..5bdb649 100644 --- a/src/assignment_evaluator.rs +++ b/src/assignment_evaluator.rs @@ -1,7 +1,5 @@ use crate::common::*; -use brev; - pub struct AssignmentEvaluator<'a: 'b, 'b> { pub assignments: &'b BTreeMap<&'a str, Expression<'a>>, pub invocation_directory: &'b Result, @@ -53,7 +51,7 @@ impl<'a, 'b> AssignmentEvaluator<'a, 'b> { let mut evaluated = String::new(); for fragment in line { match *fragment { - Fragment::Text { ref text } => evaluated += text.lexeme, + Fragment::Text { ref text } => evaluated += text.lexeme(), Fragment::Expression { ref expression } => { evaluated += &self.evaluate_expression(expression, arguments)?; } @@ -183,7 +181,7 @@ mod test { output_error: OutputError::Code(code), } => { assert_eq!(code, 100); - assert_eq!(token.lexeme, "`f() { return 100; }; f`"); + assert_eq!(token.lexeme(), "`f() { return 100; }; f`"); } other => panic!("expected a code run error, but got: {}", other), } @@ -211,7 +209,7 @@ recipe: token, output_error: OutputError::Code(_), } => { - assert_eq!(token.lexeme, "`echo $exported_variable`"); + assert_eq!(token.lexeme(), "`echo $exported_variable`"); } other => panic!("expected a backtick code errror, but got: {}", other), } diff --git a/src/assignment_resolver.rs b/src/assignment_resolver.rs index 51e91ad..d103fcd 100644 --- a/src/assignment_resolver.rs +++ b/src/assignment_resolver.rs @@ -45,10 +45,10 @@ impl<'a: 'b, 'b> AssignmentResolver<'a, 'b> { let message = format!("attempted to resolve unknown assignment `{}`", name); return Err(CompilationError { text: "", - index: 0, + offset: 0, line: 0, column: 0, - width: None, + width: 0, kind: Internal { message }, }); } @@ -96,40 +96,40 @@ mod test { compilation_error_test! { name: circular_variable_dependency, input: "a = b\nb = a", - index: 0, + offset: 0, line: 0, column: 0, - width: Some(1), + width: 1, kind: CircularVariableDependency{variable: "a", circle: vec!["a", "b", "a"]}, } compilation_error_test! { name: self_variable_dependency, input: "a = a", - index: 0, + offset: 0, line: 0, column: 0, - width: Some(1), + width: 1, kind: CircularVariableDependency{variable: "a", circle: vec!["a", "a"]}, } compilation_error_test! { name: unknown_expression_variable, input: "x = yy", - index: 4, + offset: 4, line: 0, column: 4, - width: Some(2), + width: 2, kind: UndefinedVariable{variable: "yy"}, } compilation_error_test! { name: unknown_function, input: "a = foo()", - index: 4, + offset: 4, line: 0, column: 4, - width: Some(3), + width: 3, kind: UnknownFunction{function: "foo"}, } diff --git a/src/color.rs b/src/color.rs index e59821c..c9bb990 100644 --- a/src/color.rs +++ b/src/color.rs @@ -2,16 +2,8 @@ use crate::common::*; use ansi_term::Color::*; use ansi_term::{ANSIGenericString, Prefix, Style, Suffix}; -use atty::is as is_atty; use atty::Stream; -#[derive(Copy, Clone)] -pub enum UseColor { - Auto, - Always, - Never, -} - #[derive(Copy, Clone)] pub struct Color { use_color: UseColor, @@ -19,16 +11,6 @@ pub struct Color { style: Style, } -impl Default for Color { - fn default() -> Color { - Color { - use_color: UseColor::Never, - atty: false, - style: Style::new(), - } - } -} - impl Color { fn restyle(self, style: Style) -> Color { Color { style, ..self } @@ -36,7 +18,7 @@ impl Color { fn redirect(self, stream: Stream) -> Color { Color { - atty: is_atty(stream), + atty: atty::is(stream), ..self } } @@ -138,3 +120,13 @@ impl Color { self.effective_style().suffix() } } + +impl Default for Color { + fn default() -> Color { + Color { + use_color: UseColor::Never, + atty: false, + style: Style::new(), + } + } +} diff --git a/src/common.rs b/src/common.rs index eff7339..52dd032 100644 --- a/src/common.rs +++ b/src/common.rs @@ -9,6 +9,7 @@ pub(crate) use std::{ path::{Path, PathBuf}, process, process::Command, + str::Chars, sync::{Mutex, MutexGuard}, usize, vec, }; @@ -16,7 +17,6 @@ pub(crate) use std::{ pub(crate) use edit_distance::edit_distance; pub(crate) use libc::{EXIT_FAILURE, EXIT_SUCCESS}; pub(crate) use log::warn; -pub(crate) use regex::Regex; pub(crate) use tempdir::TempDir; pub(crate) use unicode_width::UnicodeWidthChar; @@ -28,10 +28,12 @@ pub(crate) use crate::{ color::Color, compilation_error::{CompilationError, CompilationErrorKind, CompilationResult}, configuration::Configuration, - cooked_string::CookedString, expression::Expression, fragment::Fragment, - function::{evaluate_function, resolve_function, FunctionContext}, + function::{evaluate_function, resolve_function}, + function_context::FunctionContext, + functions::Functions, + interrupt_guard::InterruptGuard, interrupt_handler::InterruptHandler, justfile::Justfile, lexer::Lexer, @@ -39,11 +41,18 @@ pub(crate) use crate::{ misc::{default, empty}, parameter::Parameter, parser::Parser, - recipe::{Recipe, RecipeContext}, + position::Position, + recipe::Recipe, + recipe_context::RecipeContext, recipe_resolver::RecipeResolver, runtime_error::{RunResult, RuntimeError}, shebang::Shebang, - token::{Token, TokenKind}, + state::State, + string_literal::StringLiteral, + token::Token, + token_kind::TokenKind, + use_color::UseColor, + variables::Variables, verbosity::Verbosity, }; diff --git a/src/compilation_error.rs b/src/compilation_error.rs index 5c53f5a..1d62b2f 100644 --- a/src/compilation_error.rs +++ b/src/compilation_error.rs @@ -7,10 +7,10 @@ pub type CompilationResult<'a, T> = Result>; #[derive(Debug, PartialEq)] pub struct CompilationError<'a> { pub text: &'a str, - pub index: usize, + pub offset: usize, pub line: usize, pub column: usize, - pub width: Option, + pub width: usize, pub kind: CompilationErrorKind<'a>, } @@ -98,8 +98,10 @@ pub enum CompilationErrorKind<'a> { function: &'a str, }, UnknownStartOfToken, + UnpairedCarriageReturn, UnterminatedInterpolation, UnterminatedString, + UnterminatedBacktick, } impl<'a> Display for CompilationError<'a> { @@ -277,12 +279,18 @@ impl<'a> Display for CompilationError<'a> { UnknownStartOfToken => { writeln!(f, "Unknown start of token:")?; } + UnpairedCarriageReturn => { + writeln!(f, "Unpaired carriage return")?; + } UnterminatedInterpolation => { writeln!(f, "Unterminated interpolation")?; } UnterminatedString => { writeln!(f, "Unterminated string")?; } + UnterminatedBacktick => { + writeln!(f, "Unterminated backtick")?; + } Internal { ref message } => { writeln!( f, @@ -295,6 +303,13 @@ impl<'a> Display for CompilationError<'a> { write!(f, "{}", message.suffix())?; - write_error_context(f, self.text, self.index, self.line, self.column, self.width) + write_error_context( + f, + self.text, + self.offset, + self.line, + self.column, + self.width, + ) } } diff --git a/src/expression.rs b/src/expression.rs index caa844f..9fd42d7 100644 --- a/src/expression.rs +++ b/src/expression.rs @@ -16,7 +16,7 @@ pub enum Expression<'a> { rhs: Box>, }, String { - cooked_string: CookedString<'a>, + cooked_string: StringLiteral<'a>, }, Variable { name: &'a str, @@ -29,11 +29,11 @@ pub enum Expression<'a> { impl<'a> Expression<'a> { pub fn variables(&'a self) -> Variables<'a> { - Variables { stack: vec![self] } + Variables::new(self) } pub fn functions(&'a self) -> Functions<'a> { - Functions { stack: vec![self] } + Functions::new(self) } } @@ -64,59 +64,3 @@ impl<'a> Display for Expression<'a> { Ok(()) } } - -pub struct Variables<'a> { - stack: Vec<&'a Expression<'a>>, -} - -impl<'a> Iterator for Variables<'a> { - type Item = &'a Token<'a>; - - fn next(&mut self) -> Option<&'a Token<'a>> { - match self.stack.pop() { - None - | Some(Expression::String { .. }) - | Some(Expression::Backtick { .. }) - | Some(Expression::Call { .. }) => None, - Some(Expression::Variable { token, .. }) => Some(token), - Some(Expression::Concatination { lhs, rhs }) => { - self.stack.push(lhs); - self.stack.push(rhs); - self.next() - } - Some(Expression::Group { expression }) => { - self.stack.push(expression); - self.next() - } - } - } -} - -pub struct Functions<'a> { - stack: Vec<&'a Expression<'a>>, -} - -impl<'a> Iterator for Functions<'a> { - type Item = (&'a Token<'a>, usize); - - fn next(&mut self) -> Option { - match self.stack.pop() { - None - | Some(Expression::String { .. }) - | Some(Expression::Backtick { .. }) - | Some(Expression::Variable { .. }) => None, - Some(Expression::Call { - token, arguments, .. - }) => Some((token, arguments.len())), - Some(Expression::Concatination { lhs, rhs }) => { - self.stack.push(lhs); - self.stack.push(rhs); - self.next() - } - Some(Expression::Group { expression }) => { - self.stack.push(expression); - self.next() - } - } - } -} diff --git a/src/fragment.rs b/src/fragment.rs index 78057d3..5171331 100644 --- a/src/fragment.rs +++ b/src/fragment.rs @@ -9,7 +9,7 @@ pub enum Fragment<'a> { impl<'a> Fragment<'a> { pub fn continuation(&self) -> bool { match *self { - Fragment::Text { ref text } => text.lexeme.ends_with('\\'), + Fragment::Text { ref text } => text.lexeme().ends_with('\\'), _ => false, } } diff --git a/src/function.rs b/src/function.rs index 039db22..bfcfe0b 100644 --- a/src/function.rs +++ b/src/function.rs @@ -37,13 +37,8 @@ impl Function { } } -pub struct FunctionContext<'a> { - pub invocation_directory: &'a Result, - pub dotenv: &'a BTreeMap, -} - pub fn resolve_function<'a>(token: &Token<'a>, argc: usize) -> CompilationResult<'a, ()> { - let name = token.lexeme; + let name = token.lexeme(); if let Some(function) = FUNCTIONS.get(&name) { use self::Function::*; match (function, argc) { @@ -58,7 +53,7 @@ pub fn resolve_function<'a>(token: &Token<'a>, argc: usize) -> CompilationResult } } else { Err(token.error(CompilationErrorKind::UnknownFunction { - function: token.lexeme, + function: token.lexeme(), })) } } diff --git a/src/function_context.rs b/src/function_context.rs new file mode 100644 index 0000000..688eecf --- /dev/null +++ b/src/function_context.rs @@ -0,0 +1,6 @@ +use crate::common::*; + +pub struct FunctionContext<'a> { + pub invocation_directory: &'a Result, + pub dotenv: &'a BTreeMap, +} diff --git a/src/functions.rs b/src/functions.rs new file mode 100644 index 0000000..a315437 --- /dev/null +++ b/src/functions.rs @@ -0,0 +1,36 @@ +use crate::common::*; + +pub struct Functions<'a> { + stack: Vec<&'a Expression<'a>>, +} + +impl<'a> Functions<'a> { + pub fn new(root: &'a Expression<'a>) -> Functions<'a> { + Functions { stack: vec![root] } + } +} + +impl<'a> Iterator for Functions<'a> { + type Item = (&'a Token<'a>, usize); + + fn next(&mut self) -> Option { + match self.stack.pop() { + None + | Some(Expression::String { .. }) + | Some(Expression::Backtick { .. }) + | Some(Expression::Variable { .. }) => None, + Some(Expression::Call { + token, arguments, .. + }) => Some((token, arguments.len())), + Some(Expression::Concatination { lhs, rhs }) => { + self.stack.push(lhs); + self.stack.push(rhs); + self.next() + } + Some(Expression::Group { expression }) => { + self.stack.push(expression); + self.next() + } + } + } +} diff --git a/src/interrupt_guard.rs b/src/interrupt_guard.rs new file mode 100644 index 0000000..74df596 --- /dev/null +++ b/src/interrupt_guard.rs @@ -0,0 +1,16 @@ +use crate::common::*; + +pub struct InterruptGuard; + +impl InterruptGuard { + pub fn new() -> InterruptGuard { + InterruptHandler::instance().block(); + InterruptGuard + } +} + +impl Drop for InterruptGuard { + fn drop(&mut self) { + InterruptHandler::instance().unblock(); + } +} diff --git a/src/interrupt_handler.rs b/src/interrupt_handler.rs index 4f3a8fa..b69279c 100644 --- a/src/interrupt_handler.rs +++ b/src/interrupt_handler.rs @@ -1,7 +1,5 @@ use crate::common::*; -use ctrlc; - pub struct InterruptHandler { blocks: u32, interrupted: bool, @@ -12,7 +10,7 @@ impl InterruptHandler { ctrlc::set_handler(|| InterruptHandler::instance().interrupt()) } - fn instance() -> MutexGuard<'static, InterruptHandler> { + pub fn instance() -> MutexGuard<'static, InterruptHandler> { lazy_static! { static ref INSTANCE: Mutex = Mutex::new(InterruptHandler::new()); } @@ -49,11 +47,11 @@ impl InterruptHandler { process::exit(130); } - fn block(&mut self) { + pub fn block(&mut self) { self.blocks += 1; } - fn unblock(&mut self) { + pub fn unblock(&mut self) { if self.blocks == 0 { die!( "{}", @@ -76,18 +74,3 @@ impl InterruptHandler { function() } } - -pub struct InterruptGuard; - -impl InterruptGuard { - fn new() -> InterruptGuard { - InterruptHandler::instance().block(); - InterruptGuard - } -} - -impl Drop for InterruptGuard { - fn drop(&mut self) { - InterruptHandler::instance().unblock(); - } -} diff --git a/src/lexer.rs b/src/lexer.rs index 94421e6..63a11b0 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -3,396 +3,611 @@ use crate::common::*; use CompilationErrorKind::*; use TokenKind::*; -fn re(pattern: &str) -> Regex { - Regex::new(pattern).unwrap() -} - -fn token(pattern: &str) -> Regex { - let mut s = String::new(); - s += r"^(?m)([ \t]*)("; - s += pattern; - s += ")"; - re(&s) -} - -fn mixed_whitespace(text: &str) -> bool { - !(text.chars().all(|c| c == ' ') || text.chars().all(|c| c == '\t')) -} - +/// Just language lexer +/// +/// `self.next` points to the next character to be lexed, and +/// the text between `self.token_start` and `self.token_end` contains +/// the current token being lexed. pub struct Lexer<'a> { - tokens: Vec>, + /// Source text text: &'a str, - rest: &'a str, - index: usize, - column: usize, - line: usize, + /// Char iterator + chars: Chars<'a>, + /// Tokens + tokens: Vec>, + /// State stack state: Vec>, -} - -#[derive(PartialEq)] -enum State<'a> { - Start, - Indent(&'a str), - Text, - Interpolation, + /// Current token start + token_start: Position, + /// Current token end + token_end: Position, + /// Next character + next: Option, } impl<'a> Lexer<'a> { - pub fn lex(text: &'a str) -> CompilationResult>> { - let lexer = Lexer { - tokens: vec![], - rest: text, - index: 0, - line: 0, + /// Lex `text` + pub fn lex(text: &str) -> CompilationResult> { + Lexer::new(text).tokenize() + } + + /// Create a new Lexer to lex `text` + fn new(text: &'a str) -> Lexer<'a> { + let mut chars = text.chars(); + let next = chars.next(); + + let start = Position { + offset: 0, column: 0, - state: vec![State::Start], - text, + line: 0, }; - lexer.inner() + Lexer { + state: vec![State::Normal], + tokens: Vec::new(), + token_start: start, + token_end: start, + chars, + next, + text, + } } - fn error(&self, kind: CompilationErrorKind<'a>) -> CompilationError<'a> { + /// Advance over the chracter in `self.next`, updating + /// `self.token_end` accordingly. + fn advance(&mut self) -> CompilationResult<'a, ()> { + match self.next { + Some(c) => { + let len_utf8 = c.len_utf8(); + + self.token_end.offset += len_utf8; + + match c { + '\n' => { + self.token_end.column = 0; + self.token_end.line += 1; + } + _ => { + self.token_end.column += len_utf8; + } + } + + self.next = self.chars.next(); + + Ok(()) + } + None => Err(self.internal_error("Lexer advanced past end of text")), + } + } + + /// Lexeme of in-progress token + fn lexeme(&self) -> &'a str { + &self.text[self.token_start.offset..self.token_end.offset] + } + + /// Un-lexed text + fn rest(&self) -> &'a str { + &self.text[self.token_end.offset..] + } + + /// Check if unlexed text begins with prefix + fn rest_starts_with(&self, prefix: &str) -> bool { + self.rest().starts_with(prefix) + } + + /// Length of current token + fn current_token_length(&self) -> usize { + self.token_end.offset - self.token_start.offset + } + + /// Get current state + fn state(&self) -> CompilationResult<'a, State<'a>> { + if self.state.is_empty() { + Err(self.internal_error("Lexer state stack empty")) + } else { + Ok(self.state[self.state.len() - 1]) + } + } + + /// Pop current state from stack + fn pop_state(&mut self) -> CompilationResult<'a, ()> { + if self.state.pop().is_none() { + Err(self.internal_error("Lexer attempted to pop in start state")) + } else { + Ok(()) + } + } + + /// Create a new token with `kind` whose lexeme + /// is between `self.token_start` and `self.token_end` + fn token(&mut self, kind: TokenKind) { + self.tokens.push(Token { + offset: self.token_start.offset, + column: self.token_start.column, + line: self.token_start.line, + text: self.text, + length: self.token_end.offset - self.token_start.offset, + kind, + }); + + // Set `token_start` to point after the lexed token + self.token_start = self.token_end; + } + + /// Create an internal error with `message` + fn internal_error(&self, message: impl Into) -> CompilationError<'a> { + // Use `self.token_end` as the location of the error CompilationError { text: self.text, - index: self.index, - line: self.line, - column: self.column, - width: None, - kind, + offset: self.token_end.offset, + line: self.token_end.line, + column: self.token_end.column, + width: 0, + kind: CompilationErrorKind::Internal { + message: message.into(), + }, } } - fn token(&self, prefix: &'a str, lexeme: &'a str, kind: TokenKind) -> Token<'a> { - Token { - index: self.index, - line: self.line, - column: self.column, + /// Create a compilation error with `kind` + fn error(&self, kind: CompilationErrorKind<'a>) -> CompilationError<'a> { + // Use the in-progress token span as the location of the error. + + // The width of the error site to highlight depends on the kind of error: + let width = match kind { + // highlight ' or " + UnterminatedString => 1, + // highlight ` + UnterminatedBacktick => 1, + // highlight the full token + _ => self.lexeme().len(), + }; + + CompilationError { text: self.text, - prefix, - lexeme, + offset: self.token_start.offset, + line: self.token_start.line, + column: self.token_start.column, + width, kind, } } - fn lex_indent(&mut self) -> CompilationResult<'a, Option>> { - lazy_static! { - static ref INDENT: Regex = re(r"^([ \t]*)[^ \t\n\r]"); + fn unterminated_interpolation_error( + &self, + interpolation_start: Position, + ) -> CompilationError<'a> { + CompilationError { + text: self.text, + offset: interpolation_start.offset, + line: interpolation_start.line, + column: interpolation_start.column, + width: 2, + kind: UnterminatedInterpolation, } - - let indentation = INDENT - .captures(self.rest) - .map(|captures| captures.get(1).unwrap().as_str()); - - if self.column == 0 { - if let Some(kind) = match (self.state.last().unwrap(), indentation) { - // ignore: was no indentation and there still isn't - // or current line is blank - (&State::Start, Some("")) | (_, None) => None, - // indent: was no indentation, now there is - (&State::Start, Some(current)) => { - if mixed_whitespace(current) { - return Err(self.error(MixedLeadingWhitespace { - whitespace: current, - })); - } - //indent = Some(current); - self.state.push(State::Indent(current)); - Some(Indent) - } - // dedent: there was indentation and now there isn't - (&State::Indent(_), Some("")) => { - // indent = None; - self.state.pop(); - Some(Dedent) - } - // was indentation and still is, check if the new indentation matches - (&State::Indent(previous), Some(current)) => { - if !current.starts_with(previous) { - return Err(self.error(InconsistentLeadingWhitespace { - expected: previous, - found: current, - })); - } - None - } - // at column 0 in some other state: this should never happen - (&State::Text, _) | (&State::Interpolation, _) => { - return Err(self.error(Internal { - message: "unexpected state at column 0".to_string(), - })); - } - } { - return Ok(Some(self.token("", "", kind))); - } - } - Ok(None) } - pub fn inner(mut self) -> CompilationResult<'a, Vec>> { - lazy_static! { - static ref AT: Regex = token(r"@"); - static ref BACKTICK: Regex = token(r"`[^`\n\r]*`"); - static ref COLON: Regex = token(r":"); - static ref COMMA: Regex = token(r","); - static ref COMMENT: Regex = token(r"#([^\n\r][^\n\r]*)?\r?$"); - static ref EOF: Regex = token(r"\z"); - static ref EOL: Regex = token(r"\n|\r\n"); - static ref EQUALS: Regex = token(r"="); - static ref INTERPOLATION_END: Regex = token(r"[}][}]"); - static ref INTERPOLATION_START_TOKEN: Regex = token(r"[{][{]"); - static ref NAME: Regex = token(r"([a-zA-Z_][a-zA-Z0-9_-]*)"); - static ref PAREN_L: Regex = token(r"[(]"); - static ref PAREN_R: Regex = token(r"[)]"); - static ref PLUS: Regex = token(r"[+]"); - static ref RAW_STRING: Regex = token(r#"'[^']*'"#); - static ref STRING: Regex = token(r#"["]"#); - static ref UNTERMINATED_RAW_STRING: Regex = token(r#"'[^']*"#); - static ref INTERPOLATION_START: Regex = re(r"^[{][{]"); - static ref LEADING_TEXT: Regex = re(r"^(?m)(.+?)[{][{]"); - static ref LINE: Regex = re(r"^(?m)[ \t]+[^ \t\n\r].*$"); - static ref TEXT: Regex = re(r"^(?m)(.+)"); - } - + /// Consume the text and produce a series of tokens + fn tokenize(mut self) -> CompilationResult<'a, Vec>> { loop { - if let Some(token) = self.lex_indent()? { - self.tokens.push(token); + if self.token_start.column == 0 { + self.lex_line_start()?; } - // insert a dedent if we're indented and we hit the end of the file - if &State::Start != self.state.last().unwrap() && EOF.is_match(self.rest) { - let token = self.token("", "", Dedent); - self.tokens.push(token); + match self.next { + Some(first) => match self.state()? { + State::Normal => self.lex_normal(first)?, + State::Interpolation { + interpolation_start, + } => self.lex_interpolation(interpolation_start, first)?, + State::Text => self.lex_text()?, + State::Indented { .. } => self.lex_indented()?, + }, + None => break, } - - let (prefix, lexeme, kind) = if let (0, &State::Indent(indent), Some(captures)) = ( - self.column, - self.state.last().unwrap(), - LINE.captures(self.rest), - ) { - let line = captures.get(0).unwrap().as_str(); - if !line.starts_with(indent) { - return Err(self.error(Internal { - message: "unexpected indent".to_string(), - })); - } - self.state.push(State::Text); - (&line[0..indent.len()], "", Line) - } else if let Some(captures) = EOF.captures(self.rest) { - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - Eof, - ) - } else if let State::Text = *self.state.last().unwrap() { - if let Some(captures) = INTERPOLATION_START.captures(self.rest) { - self.state.push(State::Interpolation); - ("", captures.get(0).unwrap().as_str(), InterpolationStart) - } else if let Some(captures) = LEADING_TEXT.captures(self.rest) { - ("", captures.get(1).unwrap().as_str(), Text) - } else if let Some(captures) = TEXT.captures(self.rest) { - ("", captures.get(1).unwrap().as_str(), Text) - } else if let Some(captures) = EOL.captures(self.rest) { - self.state.pop(); - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - Eol, - ) - } else { - return Err(self.error(Internal { - message: format!("Could not match token in text state: \"{}\"", self.rest), - })); - } - } else if let Some(captures) = INTERPOLATION_START_TOKEN.captures(self.rest) { - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - InterpolationStart, - ) - } else if let Some(captures) = INTERPOLATION_END.captures(self.rest) { - if self.state.last().unwrap() == &State::Interpolation { - self.state.pop(); - } - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - InterpolationEnd, - ) - } else if let Some(captures) = NAME.captures(self.rest) { - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - Name, - ) - } else if let Some(captures) = EOL.captures(self.rest) { - if self.state.last().unwrap() == &State::Interpolation { - return Err(self.error(UnterminatedInterpolation)); - } - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - Eol, - ) - } else if let Some(captures) = BACKTICK.captures(self.rest) { - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - Backtick, - ) - } else if let Some(captures) = COLON.captures(self.rest) { - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - Colon, - ) - } else if let Some(captures) = AT.captures(self.rest) { - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - At, - ) - } else if let Some(captures) = COMMA.captures(self.rest) { - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - Comma, - ) - } else if let Some(captures) = PAREN_L.captures(self.rest) { - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - ParenL, - ) - } else if let Some(captures) = PAREN_R.captures(self.rest) { - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - ParenR, - ) - } else if let Some(captures) = PLUS.captures(self.rest) { - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - Plus, - ) - } else if let Some(captures) = EQUALS.captures(self.rest) { - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - Equals, - ) - } else if let Some(captures) = COMMENT.captures(self.rest) { - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - Comment, - ) - } else if let Some(captures) = RAW_STRING.captures(self.rest) { - ( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - RawString, - ) - } else if UNTERMINATED_RAW_STRING.is_match(self.rest) { - return Err(self.error(UnterminatedString)); - } else if let Some(captures) = STRING.captures(self.rest) { - let prefix = captures.get(1).unwrap().as_str(); - let contents = &self.rest[prefix.len() + 1..]; - if contents.is_empty() { - return Err(self.error(UnterminatedString)); - } - let mut len = 0; - let mut escape = false; - for c in contents.chars() { - if c == '\n' || c == '\r' { - return Err(self.error(UnterminatedString)); - } else if !escape && c == '"' { - break; - } else if !escape && c == '\\' { - escape = true; - } else if escape { - escape = false; - } - len += c.len_utf8(); - } - let start = prefix.len(); - let content_end = start + len + 1; - if escape || content_end >= self.rest.len() { - return Err(self.error(UnterminatedString)); - } - (prefix, &self.rest[start..=content_end], StringToken) - } else { - return Err(self.error(UnknownStartOfToken)); - }; - - let token = self.token(prefix, lexeme, kind); - self.tokens.push(token); - - let len = prefix.len() + lexeme.len(); - - if len == 0 { - let last = self.tokens.last().unwrap(); - match last.kind { - Eof => {} - _ => { - return Err(last.error(Internal { - message: format!("zero length token: {:?}", last), - })); - } - } - } - - match self.tokens.last().unwrap().kind { - Eol => { - self.line += 1; - self.column = 0; - } - Eof => { - break; - } - RawString => { - let lexeme_lines = lexeme.lines().count(); - self.line += lexeme_lines - 1; - if lexeme_lines == 1 { - self.column += len; - } else { - self.column = lexeme.lines().last().unwrap().len(); - } - } - _ => { - self.column += len; - } - } - - self.rest = &self.rest[len..]; - self.index += len; } + if let State::Interpolation { + interpolation_start, + } = self.state()? + { + return Err(self.unterminated_interpolation_error(interpolation_start)); + } + + if let State::Indented { .. } | State::Text = self.state()? { + self.token(Dedent); + } + + self.token(Eof); + Ok(self.tokens) } + + /// Handle blank lines and indentation + fn lex_line_start(&mut self) -> CompilationResult<'a, ()> { + let nonblank_index = self + .rest() + .char_indices() + .skip_while(|&(_, c)| c == ' ' || c == '\t') + .map(|(i, _)| i) + .next() + .unwrap_or_else(|| self.rest().len()); + + let rest = &self.rest()[nonblank_index..]; + + // Handle blank line + if rest.starts_with('\n') || rest.starts_with("\r\n") || rest.is_empty() { + while let Some(' ') | Some('\t') = self.next { + self.advance()?; + } + + // Lex a whitespace token if the blank line was nonempty + if self.current_token_length() > 0 { + self.token(Whitespace); + }; + + return Ok(()); + } + + // Handle nonblank lines with no leading whitespace + if self.next != Some(' ') && self.next != Some('\t') { + if let State::Indented { .. } = self.state()? { + self.token(Dedent); + self.pop_state()?; + } + + return Ok(()); + } + + // Handle continued indentation + if let State::Indented { indentation } = self.state()? { + let mut remaining = indentation.len(); + + // Advance over whitespace up to length of current indentation + while let Some(' ') | Some('\t') = self.next { + self.advance()?; + remaining -= 1; + if remaining == 0 { + break; + } + } + + let lexeme = self.lexeme(); + + if lexeme != indentation { + return Err(self.error(InconsistentLeadingWhitespace { + expected: indentation, + found: lexeme, + })); + } + + // Indentation matches, lex as whitespace + self.token(Whitespace); + + return Ok(()); + } + + if self.state()? != State::Normal { + return Err(self.internal_error(format!( + "Lexer::lex_line_start called in unexpected state: {:?}", + self.state() + ))); + } + + // Handle new indentation + while let Some(' ') | Some('\t') = self.next { + self.advance()?; + } + + let indentation = self.lexeme(); + + let spaces = indentation.chars().any(|c| c == ' '); + let tabs = indentation.chars().any(|c| c == '\t'); + + if spaces && tabs { + return Err(self.error(MixedLeadingWhitespace { + whitespace: indentation, + })); + } + + self.state.push(State::Indented { indentation }); + + self.token(Indent); + + Ok(()) + } + + /// Lex token beginning with `start` in normal state + fn lex_normal(&mut self, start: char) -> CompilationResult<'a, ()> { + match start { + '@' => self.lex_single(At), + '=' => self.lex_single(Equals), + ',' => self.lex_single(Comma), + ':' => self.lex_single(Colon), + '(' => self.lex_single(ParenL), + ')' => self.lex_single(ParenR), + '{' => self.lex_brace_l(), + '}' => self.lex_brace_r(), + '+' => self.lex_single(Plus), + '\n' => self.lex_single(Eol), + '\r' => self.lex_cr_lf(), + '#' => self.lex_comment(), + '`' => self.lex_backtick(), + ' ' | '\t' => self.lex_whitespace(), + '\'' => self.lex_raw_string(), + '"' => self.lex_cooked_string(), + 'a'...'z' | 'A'...'Z' | '_' => self.lex_name(), + _ => { + self.advance()?; + Err(self.error(UnknownStartOfToken)) + } + } + } + + /// Lex token beginning with `start` in interpolation state + fn lex_interpolation( + &mut self, + interpolation_start: Position, + start: char, + ) -> CompilationResult<'a, ()> { + // Check for end of interpolation + if self.rest_starts_with("}}") { + // Pop interpolation state + self.pop_state()?; + // Emit interpolation end token + self.lex_double(InterpolationEnd) + } else if self.rest_starts_with("\n") || self.rest_starts_with("\r\n") { + // Return unterminated interpolation error that highlights the opening {{ + Err(self.unterminated_interpolation_error(interpolation_start)) + } else { + // Otherwise lex as if we are in normal state + self.lex_normal(start) + } + } + + /// Lex token beginning with `start` in text state + fn lex_text(&mut self) -> CompilationResult<'a, ()> { + enum Terminator { + Newline, + NewlineCarriageReturn, + Interpolation, + EndOfFile, + } + + use Terminator::*; + + let terminator = loop { + if let Some('\n') = self.next { + break Newline; + } + + if self.rest_starts_with("\r\n") { + break NewlineCarriageReturn; + } + + if self.rest_starts_with("{{") { + break Interpolation; + } + + if self.next.is_none() { + break EndOfFile; + } + + self.advance()?; + }; + + // emit text token containing text so far + if self.current_token_length() > 0 { + self.token(Text); + } + + match terminator { + Newline => { + self.state.pop(); + self.lex_single(Eol) + } + NewlineCarriageReturn => { + self.state.pop(); + self.lex_double(Eol) + } + Interpolation => { + self.state.push(State::Interpolation { + interpolation_start: self.token_start, + }); + self.lex_double(InterpolationStart) + } + EndOfFile => self.pop_state(), + } + } + + /// Lex token beginning with `start` in indented state + fn lex_indented(&mut self) -> CompilationResult<'a, ()> { + self.state.push(State::Text); + self.token(Line); + Ok(()) + } + + /// Lex a single character token + fn lex_single(&mut self, kind: TokenKind) -> CompilationResult<'a, ()> { + self.advance()?; + self.token(kind); + Ok(()) + } + + /// Lex a double character token + fn lex_double(&mut self, kind: TokenKind) -> CompilationResult<'a, ()> { + self.advance()?; + self.advance()?; + self.token(kind); + Ok(()) + } + + /// Lex a token starting with '{' + fn lex_brace_l(&mut self) -> CompilationResult<'a, ()> { + if !self.rest_starts_with("{{") { + self.advance()?; + + return Err(self.error(UnknownStartOfToken)); + } + + self.lex_double(InterpolationStart) + } + + /// Lex a token starting with '}' + fn lex_brace_r(&mut self) -> CompilationResult<'a, ()> { + if !self.rest_starts_with("}}") { + self.advance()?; + + return Err(self.error(UnknownStartOfToken)); + } + + self.lex_double(InterpolationEnd) + } + + /// Lex a carriage return and line feed + fn lex_cr_lf(&mut self) -> CompilationResult<'a, ()> { + if !self.rest_starts_with("\r\n") { + // advance over \r + self.advance()?; + + return Err(self.error(UnpairedCarriageReturn)); + } + + self.lex_double(Eol) + } + + /// Lex name: [a-zA-Z_][a-zA-Z0-9_]* + fn lex_name(&mut self) -> CompilationResult<'a, ()> { + while let Some('a'...'z') | Some('A'...'Z') | Some('0'...'9') | Some('_') | Some('-') = + self.next + { + self.advance()?; + } + + self.token(Name); + + Ok(()) + } + + /// Lex comment: #[^\r\n] + fn lex_comment(&mut self) -> CompilationResult<'a, ()> { + // advance over # + self.advance()?; + + loop { + if let Some('\r') | Some('\n') | None = self.next { + break; + } + + self.advance()?; + } + + self.token(Comment); + + Ok(()) + } + + /// Lex backtick: `[^\r\n]*` + fn lex_backtick(&mut self) -> CompilationResult<'a, ()> { + // advance over ` + self.advance()?; + + loop { + if let Some('\r') | Some('\n') | None = self.next { + return Err(self.error(UnterminatedBacktick)); + } + + if let Some('`') = self.next { + self.advance()?; + break; + } + + self.advance()?; + } + + self.token(Backtick); + + Ok(()) + } + + /// Lex whitespace: [ \t]+ + fn lex_whitespace(&mut self) -> CompilationResult<'a, ()> { + while let Some(' ') | Some('\t') = self.next { + self.advance()? + } + + self.token(Whitespace); + + Ok(()) + } + + /// Lex raw string: '[^']*' + fn lex_raw_string(&mut self) -> CompilationResult<'a, ()> { + // advance over opening ' + self.advance()?; + + loop { + match self.next { + Some('\'') => break, + None => return Err(self.error(UnterminatedString)), + _ => {} + } + + self.advance()?; + } + + // advance over closing ' + self.advance()?; + + self.token(StringRaw); + + Ok(()) + } + + /// Lex cooked string: "[^"\n\r]*" (also processes escape sequences) + fn lex_cooked_string(&mut self) -> CompilationResult<'a, ()> { + // advance over opening " + self.advance()?; + + let mut escape = false; + + loop { + match self.next { + Some('\r') | Some('\n') | None => return Err(self.error(UnterminatedString)), + Some('"') if !escape => break, + Some('\\') if !escape => escape = true, + _ => escape = false, + } + + self.advance()?; + } + + // advance over closing " + self.advance()?; + + self.token(StringCooked); + + Ok(()) + } } #[cfg(test)] -mod test { +mod tests { use super::*; + use crate::testing::token_summary; + macro_rules! summary_test { ($name:ident, $input:expr, $expected:expr $(,)*) => { #[test] fn $name() { let input = $input; let expected = $expected; - let tokens = crate::lexer::Lexer::lex(input).unwrap(); + let tokens = Lexer::lex(input).unwrap(); let roundtrip = tokens .iter() - .map(|t| { - let mut s = String::new(); - s += t.prefix; - s += t.lexeme; - s - }) - .collect::>() + .map(Token::lexeme) + .collect::>() .join(""); let actual = token_summary(&tokens); if actual != expected { @@ -406,40 +621,11 @@ mod test { }; } - fn token_summary(tokens: &[Token]) -> String { - tokens - .iter() - .map(|t| match t.kind { - At => "@", - Backtick => "`", - Colon => ":", - Comma => ",", - Comment { .. } => "#", - Dedent => "<", - Eof => ".", - Eol => "$", - Equals => "=", - Indent { .. } => ">", - InterpolationEnd => "}", - InterpolationStart => "{", - Line { .. } => "^", - Name => "N", - ParenL => "(", - ParenR => ")", - Plus => "+", - RawString => "'", - StringToken => "\"", - Text => "_", - }) - .collect::>() - .join("") - } - macro_rules! error_test { ( name: $name:ident, input: $input:expr, - index: $index:expr, + offset: $offset:expr, line: $line:expr, column: $column:expr, width: $width:expr, @@ -451,7 +637,7 @@ mod test { let expected = CompilationError { text: input, - index: $index, + offset: $offset, line: $line, column: $column, width: $width, @@ -460,7 +646,7 @@ mod test { if let Err(error) = Lexer::lex(input) { assert_eq!(error.text, expected.text); - assert_eq!(error.index, expected.index); + assert_eq!(error.offset, expected.offset); assert_eq!(error.line, expected.line); assert_eq!(error.column, expected.column); assert_eq!(error.kind, expected.kind); @@ -472,10 +658,159 @@ mod test { }; } + summary_test! { + name, + "foo", + "N.", + } + + summary_test! { + comment, + "# hello", + "#.", + } + + summary_test! { + backtick, + "`echo`", + "`.", + } + + summary_test! { + raw_string, + "'hello'", + "'.", + } + + summary_test! { + cooked_string, + r#""hello""#, + r#""."#, + } + + summary_test! { + export_concatination, + "export foo = 'foo' + 'bar'", + "N N = ' + '.", + } + + summary_test! { + export_complex, + "export foo = ('foo' + 'bar') + `baz`", + "N N = (' + ') + `.", + } + + summary_test! { + eol_linefeed, + "\n", + "$.", + } + + summary_test! { + eol_carriage_return_linefeed, + "\r\n", + "$.", + } + + summary_test! { + indented_line, + "foo:\n a", + "N:$>^_<.", + } + + summary_test! { + indented_block, + r##"foo: + a + b + c +"##, + "N:$>^_$ ^_$ ^_$<.", + } + + summary_test! { + indented_block_followed_by_item, + "foo: + a +b:", + "N:$>^_$^_$^$^_$<.", + } + + summary_test! { + indented_blocks, + " +b: a + @mv a b + +a: + @touch F + @touch a + +d: c + @rm c + +c: b + @mv b c", + "$N: N$>^_$^$^_$ ^_$^$^_$^$^_<.", + } + + summary_test! { + interpolation_empty, + "hello:\n echo {{}}", + "N:$>^_{}<.", + } + + summary_test! { + interpolation_expression, + "hello:\n echo {{`echo hello` + `echo goodbye`}}", + "N:$>^_{` + `}<.", + } + + summary_test! { + tokenize_names, + "\ +foo +bar-bob +b-bob_asdfAAAA +test123", + "N$N$N$N.", + } + + summary_test! { + tokenize_indented_line, + "foo:\n a", + "N:$>^_<.", + } + + summary_test! { + tokenize_indented_block, + r##"foo: + a + b + c +"##, + "N:$>^_$ ^_$ ^_$<.", + } + summary_test! { tokenize_strings, r#"a = "'a'" + '"b"' + "'c'" + '"d"'#echo hello"#, - r#"N="+'+"+'#."#, + r#"N = " + ' + " + '#."#, } summary_test! { @@ -483,7 +818,7 @@ mod test { "foo: # some comment {{hello}} ", - "N:#$>^{N}$<.", + "N: #$>^{N}$<.", } summary_test! { @@ -492,13 +827,13 @@ mod test { {{hello}} # another comment ", - "N:#$>^{N}$<#$.", + "N: #$>^{N}$<#$.", } summary_test! { tokenize_recipe_complex_interpolation_expression, "foo: #lol\n {{a + b + \"z\" + blarg}}", - "N:#$>^{N+N+\"+N}<.", + "N: #$>^{N + N + \" + N}<.", } summary_test! { @@ -513,7 +848,7 @@ mod test { hello blah blah blah : a b c #whatever ", - "N$$NNNN:NNN#$.", + "N$$N N N N : N N N #$ .", } summary_test! { @@ -530,7 +865,7 @@ hello: # yolo ", - "$#$N:$>^_$^_$$^_$$^_$$<#$.", + "$#$N:$>^_$ ^_$^$ ^_$^$ ^_$^$<#$ .", } summary_test! { @@ -541,19 +876,25 @@ A='1' echo: echo {{A}} ", - "$#$N='$N:$>^_{N}$<.", + "$#$N='$N:$>^_{N}$ <.", } summary_test! { tokenize_interpolation_backticks, "hello:\n echo {{`echo hello` + `echo goodbye`}}", - "N:$>^_{`+`}<.", + "N:$>^_{` + `}<.", + } + + summary_test! { + tokenize_empty_interpolation, + "hello:\n echo {{}}", + "N:$>^_{}<.", } summary_test! { tokenize_assignment_backticks, "a = `echo hello` + `echo goodbye`", - "N=`+`.", + "N = ` + `.", } summary_test! { @@ -570,9 +911,9 @@ hello: # hello bob: frank - ", + \t", - "$N:$>^_$^_$$^_$$^_$$<#$N:$>^_$<.", + "$N:$>^_$ ^_$^$ ^_$^$ ^_$^$<#$N:$>^_$ <.", } summary_test! { @@ -602,13 +943,13 @@ d: c c: b @mv b c", - "$N:N$>^_$$^_$^_$$^_$$^_<.", + "$N: N$>^_$^$^_$ ^_$^$^_$^$^_<.", } summary_test! { tokenize_parens, r"((())) )abc(+", - "((())))N(+.", + "((())) )N(+.", } summary_test! { @@ -630,10 +971,10 @@ c: b 1 \t2 ", - index: 9, + offset: 9, line: 3, column: 0, - width: None, + width: 1, kind: InconsistentLeadingWhitespace{expected: " ", found: "\t"}, } @@ -644,71 +985,121 @@ c: b \t\t 1 \t 2 ", - index: 12, + offset: 12, line: 3, column: 0, - width: None, - kind: InconsistentLeadingWhitespace{expected: "\t\t", found: "\t "}, + width: 2, + kind: InconsistentLeadingWhitespace{expected: "\t\t", found: "\t "}, } error_test! { - name: tokenize_unknown, - input: "~", - index: 0, + name: tokenize_unknown, + input: "~", + offset: 0, line: 0, column: 0, - width: None, + width: 1, kind: UnknownStartOfToken, } error_test! { - name: unterminated_string, - input: r#"a = ""#, - index: 3, - line: 0, - column: 3, - width: None, - kind: UnterminatedString, - } - - error_test! { - name: unterminated_string_with_escapes, - input: r#"a = "\n\t\r\"\\"#, - index: 3, - line: 0, - column: 3, - width: None, - kind: UnterminatedString, - } - - error_test! { - name: unterminated_raw_string, - input: "r a='asdf", - index: 4, + name: unterminated_string_with_escapes, + input: r#"a = "\n\t\r\"\\"#, + offset: 4, line: 0, column: 4, - width: None, + width: 1, kind: UnterminatedString, } error_test! { - name: unterminated_interpolation, - input: "foo:\n echo {{ + name: unterminated_raw_string, + input: "r a='asdf", + offset: 4, + line: 0, + column: 4, + width: 1, + kind: UnterminatedString, + } + + error_test! { + name: unterminated_interpolation, + input: "foo:\n echo {{ ", - index: 13, + offset: 11, line: 1, - column: 8, - width: None, + column: 6, + width: 2, kind: UnterminatedInterpolation, } + error_test! { + name: unterminated_backtick, + input: "`echo", + offset: 0, + line: 0, + column: 0, + width: 1, + kind: UnterminatedBacktick, + } + + error_test! { + name: unpaired_carriage_return, + input: "foo\rbar", + offset: 3, + line: 0, + column: 3, + width: 1, + kind: UnpairedCarriageReturn, + } + + error_test! { + name: unknown_start_of_token_ampersand, + input: " \r\n&", + offset: 3, + line: 1, + column: 0, + width: 1, + kind: UnknownStartOfToken, + } + + error_test! { + name: unknown_start_of_token_tilde, + input: "~", + offset: 0, + line: 0, + column: 0, + width: 1, + kind: UnknownStartOfToken, + } + + error_test! { + name: unterminated_string, + input: r#"a = ""#, + offset: 4, + line: 0, + column: 4, + width: 1, + kind: UnterminatedString, + } + error_test! { name: mixed_leading_whitespace, input: "a:\n\t echo hello", - index: 3, + offset: 3, line: 1, column: 0, - width: None, + width: 2, kind: MixedLeadingWhitespace{whitespace: "\t "}, } + + error_test! { + name: unclosed_interpolation_delimiter, + input: "a:\n echo {{ foo", + offset: 9, + line: 1, + column: 6, + width: 2, + kind: UnterminatedInterpolation, + } } diff --git a/src/lib.rs b/src/lib.rs index 4348141..12afb09 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,10 +20,12 @@ mod command_ext; mod common; mod compilation_error; mod configuration; -mod cooked_string; mod expression; mod fragment; mod function; +mod function_context; +mod functions; +mod interrupt_guard; mod interrupt_handler; mod justfile; mod lexer; @@ -32,13 +34,20 @@ mod misc; mod parameter; mod parser; mod platform; +mod position; mod range_ext; mod recipe; +mod recipe_context; mod recipe_resolver; mod run; mod runtime_error; mod shebang; +mod state; +mod string_literal; mod token; +mod token_kind; +mod use_color; +mod variables; mod verbosity; pub use crate::run::run; diff --git a/src/misc.rs b/src/misc.rs index a5b64c0..dcad3d9 100644 --- a/src/misc.rs +++ b/src/misc.rs @@ -58,11 +58,13 @@ pub fn conjoin( pub fn write_error_context( f: &mut Formatter, text: &str, - index: usize, + offset: usize, line: usize, column: usize, - width: Option, + width: usize, ) -> Result<(), fmt::Error> { + let width = if width == 0 { 1 } else { width }; + let line_number = line + 1; let red = Color::fmt(f).error(); match text.lines().nth(line) { @@ -77,14 +79,14 @@ pub fn write_error_context( if i < column { space_column += 4; } - if i >= column && i < column + width.unwrap_or(1) { + if i >= column && i < column + width { space_width += 4; } } else { if i < column { space_column += UnicodeWidthChar::width(c).unwrap_or(0); } - if i >= column && i < column + width.unwrap_or(1) { + if i >= column && i < column + width { space_width += UnicodeWidthChar::width(c).unwrap_or(0); } space_line.push(c); @@ -95,30 +97,19 @@ pub fn write_error_context( writeln!(f, "{0:1$} |", "", line_number_width)?; writeln!(f, "{} | {}", line_number, space_line)?; write!(f, "{0:1$} |", "", line_number_width)?; - if width == None { - write!( - f, - " {0:1$}{2}^{3}", - "", - space_column, - red.prefix(), - red.suffix() - )?; - } else { - write!( - f, - " {0:1$}{2}{3:^<4$}{5}", - "", - space_column, - red.prefix(), - "", - space_width, - red.suffix() - )?; - } + write!( + f, + " {0:1$}{2}{3:^<4$}{5}", + "", + space_column, + red.prefix(), + "", + space_width, + red.suffix() + )?; } None => { - if index != text.len() { + if offset != text.len() { write!( f, "internal error: Error has invalid line number: {}", diff --git a/src/parser.rs b/src/parser.rs index d61a9fa..6600ba0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -16,7 +16,8 @@ pub struct Parser<'a> { impl<'a> Parser<'a> { pub fn parse(text: &'a str) -> CompilationResult<'a, Justfile> { - let tokens = Lexer::lex(text)?; + let mut tokens = Lexer::lex(text)?; + tokens.retain(|token| token.kind != Whitespace); let parser = Parser::new(text, tokens); parser.justfile() } @@ -87,7 +88,7 @@ impl<'a> Parser<'a> { doc: Option>, quiet: bool, ) -> CompilationResult<'a, ()> { - if let Some(recipe) = self.recipes.get(name.lexeme) { + if let Some(recipe) = self.recipes.get(name.lexeme()) { return Err(name.error(DuplicateRecipe { recipe: recipe.name, first: recipe.line_number, @@ -115,14 +116,14 @@ impl<'a> Parser<'a> { if parsed_variadic_parameter { return Err(parameter.error(ParameterFollowsVariadicParameter { - parameter: parameter.lexeme, + parameter: parameter.lexeme(), })); } - if parameters.iter().any(|p| p.name == parameter.lexeme) { + if parameters.iter().any(|p| p.name == parameter.lexeme()) { return Err(parameter.error(DuplicateParameter { - recipe: name.lexeme, - parameter: parameter.lexeme, + recipe: name.lexeme(), + parameter: parameter.lexeme(), })); } @@ -135,7 +136,7 @@ impl<'a> Parser<'a> { if parsed_parameter_with_default && default.is_none() { return Err(parameter.error(RequiredParameterFollowsDefaultParameter { - parameter: parameter.lexeme, + parameter: parameter.lexeme(), })); } @@ -143,7 +144,7 @@ impl<'a> Parser<'a> { parsed_variadic_parameter = variadic; parameters.push(Parameter { - name: parameter.lexeme, + name: parameter.lexeme(), token: parameter, default, variadic, @@ -163,13 +164,13 @@ impl<'a> Parser<'a> { let mut dependencies = vec![]; let mut dependency_tokens = vec![]; while let Some(dependency) = self.accept(Name) { - if dependencies.contains(&dependency.lexeme) { + if dependencies.contains(&dependency.lexeme()) { return Err(dependency.error(DuplicateDependency { - recipe: name.lexeme, - dependency: dependency.lexeme, + recipe: name.lexeme(), + dependency: dependency.lexeme(), })); } - dependencies.push(dependency.lexeme); + dependencies.push(dependency.lexeme()); dependency_tokens.push(dependency); } @@ -197,7 +198,7 @@ impl<'a> Parser<'a> { if let Some(token) = self.accept(Text) { if fragments.is_empty() { if lines.is_empty() { - if token.lexeme.starts_with("#!") { + if token.lexeme().starts_with("#!") { shebang = true; } } else if !shebang @@ -206,7 +207,7 @@ impl<'a> Parser<'a> { .and_then(|line| line.last()) .map(Fragment::continuation) .unwrap_or(false) - && (token.lexeme.starts_with(' ') || token.lexeme.starts_with('\t')) + && (token.lexeme().starts_with(' ') || token.lexeme().starts_with('\t')) { return Err(token.error(ExtraLeadingWhitespace)); } @@ -234,12 +235,12 @@ impl<'a> Parser<'a> { } self.recipes.insert( - name.lexeme, + name.lexeme(), Recipe { line_number: name.line, - name: name.lexeme, - doc: doc.map(|t| t.lexeme[1..].trim()), - private: &name.lexeme[0..1] == "_", + name: name.lexeme(), + doc: doc.map(|t| t.lexeme()[1..].trim()), + private: &name.lexeme()[0..1] == "_", dependencies, dependency_tokens, lines, @@ -263,26 +264,26 @@ impl<'a> Parser<'a> { } let arguments = self.arguments()?; if let Some(token) = self.expect(ParenR) { - return Err(self.unexpected_token(&token, &[Name, StringToken, ParenR])); + return Err(self.unexpected_token(&token, &[Name, StringCooked, ParenR])); } Ok(Expression::Call { - name: first.lexeme, + name: first.lexeme(), token: first, arguments, }) } else { Ok(Expression::Variable { - name: first.lexeme, + name: first.lexeme(), token: first, }) } } Backtick => Ok(Expression::Backtick { - raw: &first.lexeme[1..first.lexeme.len() - 1], + raw: &first.lexeme()[1..first.lexeme().len() - 1], token: first, }), - RawString | StringToken => Ok(Expression::String { - cooked_string: CookedString::new(&first)?, + StringRaw | StringCooked => Ok(Expression::String { + cooked_string: StringLiteral::new(&first)?, }), ParenL => { let expression = self.expression()?; @@ -295,7 +296,7 @@ impl<'a> Parser<'a> { expression: Box::new(expression), }) } - _ => Err(self.unexpected_token(&first, &[Name, StringToken])), + _ => Err(self.unexpected_token(&first, &[Name, StringCooked])), } } @@ -333,13 +334,13 @@ impl<'a> Parser<'a> { } fn assignment(&mut self, name: Token<'a>, export: bool) -> CompilationResult<'a, ()> { - if self.assignments.contains_key(name.lexeme) { + if self.assignments.contains_key(name.lexeme()) { return Err(name.error(DuplicateVariable { - variable: name.lexeme, + variable: name.lexeme(), })); } if export { - self.exports.insert(name.lexeme); + self.exports.insert(name.lexeme()); } let expression = self.expression()?; @@ -347,14 +348,14 @@ impl<'a> Parser<'a> { return Err(self.unexpected_token(&token, &[Plus, Eol])); } - self.assignments.insert(name.lexeme, expression); - self.assignment_tokens.insert(name.lexeme, name); + self.assignments.insert(name.lexeme(), expression); + self.assignment_tokens.insert(name.lexeme(), name); Ok(()) } fn alias(&mut self, name: Token<'a>) -> CompilationResult<'a, ()> { // Make sure alias doesn't already exist - if let Some(alias) = self.aliases.get(name.lexeme) { + if let Some(alias) = self.aliases.get(name.lexeme()) { return Err(name.error(DuplicateAlias { alias: alias.name, first: alias.line_number, @@ -363,7 +364,7 @@ impl<'a> Parser<'a> { // Make sure the next token is of kind Name and keep it let target = if let Some(next) = self.accept(Name) { - next.lexeme + next.lexeme() } else { let unexpected = self.tokens.next().unwrap(); return Err(self.unexpected_token(&unexpected, &[Name])); @@ -375,15 +376,15 @@ impl<'a> Parser<'a> { } self.aliases.insert( - name.lexeme, + name.lexeme(), Alias { - name: name.lexeme, + name: name.lexeme(), line_number: name.line, - private: name.lexeme.starts_with('_'), + private: name.lexeme().starts_with('_'), target, }, ); - self.alias_tokens.insert(name.lexeme, name); + self.alias_tokens.insert(name.lexeme(), name); Ok(()) } @@ -416,7 +417,7 @@ impl<'a> Parser<'a> { } } Name => { - if token.lexeme == "export" { + if token.lexeme() == "export" { let next = self.tokens.next().unwrap(); if next.kind == Name && self.accepted(Equals) { self.assignment(next, true)?; @@ -426,7 +427,7 @@ impl<'a> Parser<'a> { self.recipe(&token, doc, false)?; doc = None; } - } else if token.lexeme == "alias" { + } else if token.lexeme() == "alias" { let next = self.tokens.next().unwrap(); if next.kind == Name && self.accepted(Equals) { self.alias(next)?; @@ -449,10 +450,10 @@ impl<'a> Parser<'a> { None => { return Err(CompilationError { text: self.text, - index: 0, + offset: 0, line: 0, column: 0, - width: None, + width: 0, kind: Internal { message: "unexpected end of token stream".to_string(), }, @@ -476,18 +477,18 @@ impl<'a> Parser<'a> { for recipe in self.recipes.values() { for parameter in &recipe.parameters { - if self.assignments.contains_key(parameter.token.lexeme) { + if self.assignments.contains_key(parameter.token.lexeme()) { return Err(parameter.token.error(ParameterShadowsVariable { - parameter: parameter.token.lexeme, + parameter: parameter.token.lexeme(), })); } } for dependency in &recipe.dependency_tokens { - if !self.recipes[dependency.lexeme].parameters.is_empty() { + if !self.recipes[dependency.lexeme()].parameters.is_empty() { return Err(dependency.error(DependencyHasParameters { recipe: recipe.name, - dependency: dependency.lexeme, + dependency: dependency.lexeme(), })); } } @@ -863,253 +864,261 @@ f y=(`echo hello` + x) +z=("foo" + "bar"):"#, "x = ('0')", } + #[rustfmt::skip] + summary_test! { + escaped_dos_newlines, + "@spam:\r +\t{ \\\r +\t\tfiglet test; \\\r +\t\tcargo build --color always 2>&1; \\\r +\t\tcargo test --color always -- --color always 2>&1; \\\r +\t} | less\r +", +"@spam: + { \\ + \tfiglet test; \\ + \tcargo build --color always 2>&1; \\ + \tcargo test --color always -- --color always 2>&1; \\ + } | less", + } + compilation_error_test! { name: duplicate_alias, input: "alias foo = bar\nalias foo = baz", - index: 22, + offset: 22, line: 1, column: 6, - width: Some(3), + width: 3, kind: DuplicateAlias { alias: "foo", first: 0 }, } compilation_error_test! { name: alias_syntax_multiple_rhs, input: "alias foo = bar baz", - index: 16, + offset: 16, line: 0, column: 16, - width: Some(3), + width: 3, kind: UnexpectedToken { expected: vec![Eol, Eof], found: Name }, } compilation_error_test! { name: alias_syntax_no_rhs, input: "alias foo = \n", - index: 12, + offset: 12, line: 0, column: 12, - width: Some(1), + width: 1, kind: UnexpectedToken {expected: vec![Name], found:Eol}, } compilation_error_test! { name: unknown_alias_target, input: "alias foo = bar\n", - index: 6, + offset: 6, line: 0, column: 6, - width: Some(3), + width: 3, kind: UnknownAliasTarget {alias: "foo", target: "bar"}, } compilation_error_test! { name: alias_shadows_recipe_before, input: "bar: \n echo bar\nalias foo = bar\nfoo:\n echo foo", - index: 23, + offset: 23, line: 2, column: 6, - width: Some(3), + width: 3, kind: AliasShadowsRecipe {alias: "foo", recipe_line: 3}, } compilation_error_test! { name: alias_shadows_recipe_after, input: "foo:\n echo foo\nalias foo = bar\nbar:\n echo bar", - index: 22, + offset: 22, line: 2, column: 6, - width: Some(3), + width: 3, kind: AliasShadowsRecipe { alias: "foo", recipe_line: 0 }, } compilation_error_test! { name: missing_colon, input: "a b c\nd e f", - index: 5, + offset: 5, line: 0, column: 5, - width: Some(1), + width: 1, kind: UnexpectedToken{expected: vec![Name, Plus, Colon], found: Eol}, } compilation_error_test! { name: missing_default_eol, input: "hello arg=\n", - index: 10, + offset: 10, line: 0, column: 10, - width: Some(1), - kind: UnexpectedToken{expected: vec![Name, StringToken], found: Eol}, + width: 1, + kind: UnexpectedToken{expected: vec![Name, StringCooked], found: Eol}, } compilation_error_test! { name: missing_default_eof, input: "hello arg=", - index: 10, + offset: 10, line: 0, column: 10, - width: Some(0), - kind: UnexpectedToken{expected: vec![Name, StringToken], found: Eof}, + width: 0, + kind: UnexpectedToken{expected: vec![Name, StringCooked], found: Eof}, } compilation_error_test! { name: parameter_after_variadic, input: "foo +a bbb:", - index: 7, + offset: 7, line: 0, column: 7, - width: Some(3), + width: 3, kind: ParameterFollowsVariadicParameter{parameter: "bbb"}, } compilation_error_test! { name: required_after_default, input: "hello arg='foo' bar:", - index: 16, + offset: 16, line: 0, column: 16, - width: Some(3), + width: 3, kind: RequiredParameterFollowsDefaultParameter{parameter: "bar"}, } compilation_error_test! { name: missing_eol, input: "a b c: z =", - index: 9, + offset: 9, line: 0, column: 9, - width: Some(1), + width: 1, kind: UnexpectedToken{expected: vec![Name, Eol, Eof], found: Equals}, } compilation_error_test! { name: duplicate_parameter, input: "a b b:", - index: 4, + offset: 4, line: 0, column: 4, - width: Some(1), + width: 1, kind: DuplicateParameter{recipe: "a", parameter: "b"}, } compilation_error_test! { name: parameter_shadows_varible, input: "foo = \"h\"\na foo:", - index: 12, + offset: 12, line: 1, column: 2, - width: Some(3), + width: 3, kind: ParameterShadowsVariable{parameter: "foo"}, } compilation_error_test! { name: dependency_has_parameters, input: "foo arg:\nb: foo", - index: 12, + offset: 12, line: 1, column: 3, - width: Some(3), + width: 3, kind: DependencyHasParameters{recipe: "b", dependency: "foo"}, } compilation_error_test! { name: duplicate_dependency, input: "a b c: b c z z", - index: 13, + offset: 13, line: 0, column: 13, - width: Some(1), + width: 1, kind: DuplicateDependency{recipe: "a", dependency: "z"}, } compilation_error_test! { name: duplicate_recipe, input: "a:\nb:\na:", - index: 6, + offset: 6, line: 2, column: 0, - width: Some(1), + width: 1, kind: DuplicateRecipe{recipe: "a", first: 0}, } compilation_error_test! { name: duplicate_variable, input: "a = \"0\"\na = \"0\"", - index: 8, + offset: 8, line: 1, column: 0, - width: Some(1), + width: 1, kind: DuplicateVariable{variable: "a"}, } compilation_error_test! { name: extra_whitespace, input: "a:\n blah\n blarg", - index: 10, + offset: 10, line: 2, column: 1, - width: Some(6), + width: 6, kind: ExtraLeadingWhitespace, } compilation_error_test! { name: interpolation_outside_of_recipe, input: "{{", - index: 0, + offset: 0, line: 0, column: 0, - width: Some(2), + width: 2, kind: UnexpectedToken{expected: vec![Name, At], found: InterpolationStart}, } - compilation_error_test! { - name: unclosed_interpolation_delimiter, - input: "a:\n echo {{ foo", - index: 15, - line: 1, - column: 12, - width: Some(0), - kind: UnexpectedToken{expected: vec![Plus, InterpolationEnd], found: Dedent}, - } - compilation_error_test! { name: unclosed_parenthesis_in_expression, input: "x = foo(", - index: 8, + offset: 8, line: 0, column: 8, - width: Some(0), - kind: UnexpectedToken{expected: vec![Name, StringToken, ParenR], found: Eof}, + width: 0, + kind: UnexpectedToken{expected: vec![Name, StringCooked, ParenR], found: Eof}, } compilation_error_test! { name: unclosed_parenthesis_in_interpolation, input: "a:\n echo {{foo(}}", - index: 15, + offset: 15, line: 1, column: 12, - width: Some(2), - kind: UnexpectedToken{expected: vec![Name, StringToken, ParenR], found: InterpolationEnd}, + width: 2, + kind: UnexpectedToken{expected: vec![Name, StringCooked, ParenR], found: InterpolationEnd}, } compilation_error_test! { name: plus_following_parameter, input: "a b c+:", - index: 5, + offset: 5, line: 0, column: 5, - width: Some(1), + width: 1, kind: UnexpectedToken{expected: vec![Name], found: Plus}, } compilation_error_test! { name: bad_export, input: "export a", - index: 8, + offset: 8, line: 0, column: 8, - width: Some(0), + width: 0, kind: UnexpectedToken{expected: vec![Name, Plus, Colon], found: Eof}, } diff --git a/src/position.rs b/src/position.rs new file mode 100644 index 0000000..caef3b7 --- /dev/null +++ b/src/position.rs @@ -0,0 +1,7 @@ +/// Source position +#[derive(Copy, Clone, PartialEq, Debug)] +pub struct Position { + pub offset: usize, + pub column: usize, + pub line: usize, +} diff --git a/src/recipe.rs b/src/recipe.rs index 0daf830..1b8e07d 100644 --- a/src/recipe.rs +++ b/src/recipe.rs @@ -38,12 +38,6 @@ pub struct Recipe<'a> { pub shebang: bool, } -pub struct RecipeContext<'a> { - pub invocation_directory: &'a Result, - pub configuration: &'a Configuration<'a>, - pub scope: BTreeMap<&'a str, String>, -} - impl<'a> Recipe<'a> { pub fn argument_range(&self) -> RangeInclusive { self.min_arguments()..=self.max_arguments() @@ -319,7 +313,13 @@ impl<'a> Display for Recipe<'a> { if let Some(doc) = self.doc { writeln!(f, "# {}", doc)?; } - write!(f, "{}", self.name)?; + + if self.quiet { + write!(f, "@{}", self.name)?; + } else { + write!(f, "{}", self.name)?; + } + for parameter in &self.parameters { write!(f, " {}", parameter)?; } @@ -337,7 +337,7 @@ impl<'a> Display for Recipe<'a> { write!(f, " ")?; } match *piece { - Fragment::Text { ref text } => write!(f, "{}", text.lexeme)?, + Fragment::Text { ref text } => write!(f, "{}", text.lexeme())?, Fragment::Expression { ref expression, .. } => write!(f, "{{{{{}}}}}", expression)?, } } diff --git a/src/recipe_context.rs b/src/recipe_context.rs new file mode 100644 index 0000000..59aaeb1 --- /dev/null +++ b/src/recipe_context.rs @@ -0,0 +1,7 @@ +use crate::common::*; + +pub struct RecipeContext<'a> { + pub invocation_directory: &'a Result, + pub configuration: &'a Configuration<'a>, + pub scope: BTreeMap<&'a str, String>, +} diff --git a/src/recipe_resolver.rs b/src/recipe_resolver.rs index d65b5cd..5eafb47 100644 --- a/src/recipe_resolver.rs +++ b/src/recipe_resolver.rs @@ -72,12 +72,12 @@ impl<'a, 'b> RecipeResolver<'a, 'b> { fn resolve_function(&self, function: &Token, argc: usize) -> CompilationResult<'a, ()> { resolve_function(function, argc).map_err(|error| CompilationError { - index: error.index, + offset: error.offset, line: error.line, column: error.column, width: error.width, kind: UnknownFunction { - function: &self.text[error.index..error.index + error.width.unwrap()], + function: &self.text[error.offset..error.offset + error.width], }, text: self.text, }) @@ -88,18 +88,18 @@ impl<'a, 'b> RecipeResolver<'a, 'b> { variable: &Token, parameters: &[Parameter], ) -> CompilationResult<'a, ()> { - let name = variable.lexeme; + let name = variable.lexeme(); let undefined = !self.assignments.contains_key(name) && !parameters.iter().any(|p| p.name == name); if undefined { let error = variable.error(UndefinedVariable { variable: name }); return Err(CompilationError { - index: error.index, + offset: error.offset, line: error.line, column: error.column, width: error.width, kind: UndefinedVariable { - variable: &self.text[error.index..error.index + error.width.unwrap()], + variable: &self.text[error.offset..error.offset + error.width], }, text: self.text, }); @@ -115,7 +115,7 @@ impl<'a, 'b> RecipeResolver<'a, 'b> { self.stack.push(recipe.name); self.seen.insert(recipe.name); for dependency_token in &recipe.dependency_tokens { - match self.recipes.get(dependency_token.lexeme) { + match self.recipes.get(dependency_token.lexeme()) { Some(dependency) => { if !self.resolved.contains(dependency.name) { if self.seen.contains(dependency.name) { @@ -139,7 +139,7 @@ impl<'a, 'b> RecipeResolver<'a, 'b> { None => { return Err(dependency_token.error(UnknownDependency { recipe: recipe.name, - unknown: dependency_token.lexeme, + unknown: dependency_token.lexeme(), })); } } @@ -157,80 +157,80 @@ mod test { compilation_error_test! { name: circular_recipe_dependency, input: "a: b\nb: a", - index: 8, + offset: 8, line: 1, column: 3, - width: Some(1), + width: 1, kind: CircularRecipeDependency{recipe: "b", circle: vec!["a", "b", "a"]}, } compilation_error_test! { name: self_recipe_dependency, input: "a: a", - index: 3, + offset: 3, line: 0, column: 3, - width: Some(1), + width: 1, kind: CircularRecipeDependency{recipe: "a", circle: vec!["a", "a"]}, } compilation_error_test! { name: unknown_dependency, input: "a: b", - index: 3, + offset: 3, line: 0, column: 3, - width: Some(1), + width: 1, kind: UnknownDependency{recipe: "a", unknown: "b"}, } compilation_error_test! { name: unknown_interpolation_variable, input: "x:\n {{ hello}}", - index: 9, + offset: 9, line: 1, column: 6, - width: Some(5), + width: 5, kind: UndefinedVariable{variable: "hello"}, } compilation_error_test! { name: unknown_second_interpolation_variable, input: "wtf=\"x\"\nx:\n echo\n foo {{wtf}} {{ lol }}", - index: 33, + offset: 33, line: 3, column: 16, - width: Some(3), + width: 3, kind: UndefinedVariable{variable: "lol"}, } compilation_error_test! { name: unknown_function_in_interpolation, input: "a:\n echo {{bar()}}", - index: 11, + offset: 11, line: 1, column: 8, - width: Some(3), + width: 3, kind: UnknownFunction{function: "bar"}, } compilation_error_test! { name: unknown_function_in_default, input: "a f=baz():", - index: 4, + offset: 4, line: 0, column: 4, - width: Some(3), + width: 3, kind: UnknownFunction{function: "baz"}, } compilation_error_test! { name: unknown_variable_in_default, input: "a f=foo:", - index: 4, + offset: 4, line: 0, column: 4, - width: Some(3), + width: 3, kind: UndefinedVariable{variable: "foo"}, } } diff --git a/src/run.rs b/src/run.rs index afa55bc..b510474 100644 --- a/src/run.rs +++ b/src/run.rs @@ -194,27 +194,33 @@ pub fn run() { } } - let override_re = Regex::new("^([^=]+)=(.*)$").unwrap(); + fn is_override(arg: &&str) -> bool { + arg.chars().skip(1).any(|c| c == '=') + } - let raw_arguments: Vec<_> = matches + let raw_arguments: Vec<&str> = matches .values_of("ARGUMENTS") .map(Iterator::collect) .unwrap_or_default(); - for argument in raw_arguments - .iter() - .take_while(|arg| override_re.is_match(arg)) - { - let captures = override_re.captures(argument).unwrap(); - overrides.insert( - captures.get(1).unwrap().as_str(), - captures.get(2).unwrap().as_str(), - ); + for argument in raw_arguments.iter().cloned().take_while(is_override) { + let i = argument + .char_indices() + .skip(1) + .filter(|&(_, c)| c == '=') + .next() + .unwrap() + .0; + + let name = &argument[..i]; + let value = &argument[i + 1..]; + + overrides.insert(name, value); } let rest = raw_arguments - .iter() - .skip_while(|arg| override_re.is_match(arg)) + .into_iter() + .skip_while(is_override) .enumerate() .flat_map(|(i, argument)| { if i == 0 { @@ -237,7 +243,7 @@ pub fn run() { } } - Some(*argument) + Some(argument) }) .collect::>(); diff --git a/src/runtime_error.rs b/src/runtime_error.rs index 277c638..f461c82 100644 --- a/src/runtime_error.rs +++ b/src/runtime_error.rs @@ -10,10 +10,10 @@ fn write_token_error_context(f: &mut Formatter, token: &Token) -> Result<(), fmt write_error_context( f, token.text, - token.index, + token.offset, token.line, - token.column + token.prefix.len(), - Some(token.lexeme.len()), + token.column, + token.lexeme().len(), ) } @@ -255,7 +255,12 @@ impl<'a> Display for RuntimeError<'a> { ref token, ref message, } => { - writeln!(f, "Call to function `{}` failed: {}", token.lexeme, message)?; + writeln!( + f, + "Call to function `{}` failed: {}", + token.lexeme(), + message + )?; error_token = Some(token); } Shebang { diff --git a/src/state.rs b/src/state.rs new file mode 100644 index 0000000..37ffd6e --- /dev/null +++ b/src/state.rs @@ -0,0 +1,9 @@ +use crate::common::*; + +#[derive(Copy, Clone, PartialEq, Debug)] +pub enum State<'a> { + Normal, + Indented { indentation: &'a str }, + Text, + Interpolation { interpolation_start: Position }, +} diff --git a/src/cooked_string.rs b/src/string_literal.rs similarity index 76% rename from src/cooked_string.rs rename to src/string_literal.rs index 279023f..11b4edb 100644 --- a/src/cooked_string.rs +++ b/src/string_literal.rs @@ -1,21 +1,21 @@ use crate::common::*; #[derive(PartialEq, Debug)] -pub struct CookedString<'a> { +pub struct StringLiteral<'a> { pub raw: &'a str, pub cooked: Cow<'a, str>, } -impl<'a> CookedString<'a> { - pub fn new(token: &Token<'a>) -> CompilationResult<'a, CookedString<'a>> { - let raw = &token.lexeme[1..token.lexeme.len() - 1]; +impl<'a> StringLiteral<'a> { + pub fn new(token: &Token<'a>) -> CompilationResult<'a, StringLiteral<'a>> { + let raw = &token.lexeme()[1..token.lexeme().len() - 1]; - if let TokenKind::RawString = token.kind { - Ok(CookedString { + if let TokenKind::StringRaw = token.kind { + Ok(StringLiteral { cooked: Cow::Borrowed(raw), raw, }) - } else if let TokenKind::StringToken = token.kind { + } else if let TokenKind::StringCooked = token.kind { let mut cooked = String::new(); let mut escape = false; for c in raw.chars() { @@ -41,7 +41,7 @@ impl<'a> CookedString<'a> { } cooked.push(c); } - Ok(CookedString { + Ok(StringLiteral { raw, cooked: Cow::Owned(cooked), }) @@ -53,7 +53,7 @@ impl<'a> CookedString<'a> { } } -impl<'a> Display for CookedString<'a> { +impl<'a> Display for StringLiteral<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self.cooked { Cow::Borrowed(raw) => write!(f, "'{}'", raw), diff --git a/src/summary.rs b/src/summary.rs index f8785a0..6cd60e1 100644 --- a/src/summary.rs +++ b/src/summary.rs @@ -138,7 +138,7 @@ impl Fragment { fn new(fragment: fragment::Fragment) -> Fragment { match fragment { fragment::Fragment::Text { text } => Fragment::Text { - text: text.lexeme.to_owned(), + text: text.lexeme().to_owned(), }, fragment::Fragment::Expression { expression } => Fragment::Expression { expression: Expression::new(expression), diff --git a/src/testing.rs b/src/testing.rs index 3790241..03c5a46 100644 --- a/src/testing.rs +++ b/src/testing.rs @@ -7,15 +7,47 @@ pub fn parse_success(text: &str) -> Justfile { } } +pub fn token_summary(tokens: &[Token]) -> String { + use TokenKind::*; + + tokens + .iter() + .map(|t| match t.kind { + At => "@", + Backtick => "`", + Colon => ":", + Comma => ",", + Comment => "#", + Dedent => "<", + Eof => ".", + Eol => "$", + Equals => "=", + Indent => ">", + InterpolationEnd => "}", + InterpolationStart => "{", + Line => "^", + Name => "N", + ParenL => "(", + ParenR => ")", + Plus => "+", + StringRaw => "'", + StringCooked => "\"", + Text => "_", + Whitespace => " ", + }) + .collect::>() + .join("") +} + macro_rules! compilation_error_test { ( - name: $name:ident, - input: $input:expr, - index: $index:expr, - line: $line:expr, - column: $column:expr, - width: $width:expr, - kind: $kind:expr, + name: $name:ident, + input: $input:expr, + offset: $offset:expr, + line: $line:expr, + column: $column:expr, + width: $width:expr, + kind: $kind:expr, ) => { #[test] fn $name() { @@ -23,19 +55,22 @@ macro_rules! compilation_error_test { let expected = crate::compilation_error::CompilationError { text: input, - index: $index, + offset: $offset, line: $line, column: $column, width: $width, kind: $kind, }; - let tokens = crate::lexer::Lexer::lex(input).unwrap(); + let mut tokens = Lexer::lex(input).unwrap(); + + tokens.retain(|token| token.kind != TokenKind::Whitespace); + let parser = crate::parser::Parser::new(input, tokens); if let Err(error) = parser.justfile() { assert_eq!(error.text, expected.text); - assert_eq!(error.index, expected.index); + assert_eq!(error.offset, expected.offset); assert_eq!(error.line, expected.line); assert_eq!(error.column, expected.column); assert_eq!(error.width, expected.width); diff --git a/src/token.rs b/src/token.rs index 268edb0..fbb352c 100644 --- a/src/token.rs +++ b/src/token.rs @@ -2,80 +2,27 @@ use crate::common::*; #[derive(Debug, PartialEq, Clone)] pub struct Token<'a> { - pub index: usize, + pub offset: usize, + pub length: usize, pub line: usize, pub column: usize, pub text: &'a str, - pub prefix: &'a str, - pub lexeme: &'a str, pub kind: TokenKind, } impl<'a> Token<'a> { + pub fn lexeme(&self) -> &'a str { + &self.text[self.offset..self.offset + self.length] + } + pub fn error(&self, kind: CompilationErrorKind<'a>) -> CompilationError<'a> { CompilationError { - column: self.column + self.prefix.len(), - index: self.index + self.prefix.len(), + column: self.column, + offset: self.offset, line: self.line, text: self.text, - width: Some(self.lexeme.len()), + width: self.length, kind, } } } - -#[derive(Debug, PartialEq, Clone, Copy)] -pub enum TokenKind { - At, - Backtick, - Colon, - Comma, - Comment, - Dedent, - Eof, - Eol, - Equals, - Indent, - InterpolationEnd, - InterpolationStart, - Line, - Name, - ParenL, - ParenR, - Plus, - RawString, - StringToken, - Text, -} - -impl Display for TokenKind { - fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> { - use TokenKind::*; - write!( - f, - "{}", - match *self { - Backtick => "backtick", - Colon => "':'", - Comma => "','", - Comment => "comment", - Dedent => "dedent", - Eof => "end of file", - Eol => "end of line", - Equals => "'='", - Indent => "indent", - InterpolationEnd => "'}}'", - InterpolationStart => "'{{'", - Line => "command", - Name => "name", - Plus => "'+'", - At => "'@'", - ParenL => "'('", - ParenR => "')'", - StringToken => "string", - RawString => "raw string", - Text => "command text", - } - ) - } -} diff --git a/src/token_kind.rs b/src/token_kind.rs new file mode 100644 index 0000000..23cda3d --- /dev/null +++ b/src/token_kind.rs @@ -0,0 +1,59 @@ +use crate::common::*; + +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum TokenKind { + At, + Backtick, + Colon, + Comma, + Comment, + Dedent, + Eof, + Eol, + Equals, + Indent, + InterpolationEnd, + InterpolationStart, + Line, + Name, + ParenL, + ParenR, + Plus, + StringRaw, + StringCooked, + Text, + Whitespace, +} + +impl Display for TokenKind { + fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> { + use TokenKind::*; + write!( + f, + "{}", + match *self { + At => "'@'", + Backtick => "backtick", + Colon => "':'", + Comma => "','", + Comment => "comment", + Dedent => "dedent", + Eof => "end of file", + Eol => "end of line", + Equals => "'='", + Indent => "indent", + InterpolationEnd => "'}}'", + InterpolationStart => "'{{'", + Line => "command", + Name => "name", + ParenL => "'('", + ParenR => "')'", + Plus => "'+'", + StringRaw => "raw string", + StringCooked => "cooked string", + Text => "command text", + Whitespace => "whitespace", + } + ) + } +} diff --git a/src/use_color.rs b/src/use_color.rs new file mode 100644 index 0000000..152b073 --- /dev/null +++ b/src/use_color.rs @@ -0,0 +1,6 @@ +#[derive(Copy, Clone)] +pub enum UseColor { + Auto, + Always, + Never, +} diff --git a/src/variables.rs b/src/variables.rs new file mode 100644 index 0000000..9c664c8 --- /dev/null +++ b/src/variables.rs @@ -0,0 +1,34 @@ +use crate::common::*; + +pub struct Variables<'a> { + stack: Vec<&'a Expression<'a>>, +} + +impl<'a> Variables<'a> { + pub fn new(root: &'a Expression<'a>) -> Variables<'a> { + Variables { stack: vec![root] } + } +} + +impl<'a> Iterator for Variables<'a> { + type Item = &'a Token<'a>; + + fn next(&mut self) -> Option<&'a Token<'a>> { + match self.stack.pop() { + None + | Some(Expression::String { .. }) + | Some(Expression::Backtick { .. }) + | Some(Expression::Call { .. }) => None, + Some(Expression::Variable { token, .. }) => Some(token), + Some(Expression::Concatination { lhs, rhs }) => { + self.stack.push(lhs); + self.stack.push(rhs); + self.next() + } + Some(Expression::Group { expression }) => { + self.stack.push(expression); + self.next() + } + } + } +} diff --git a/tests/integration.rs b/tests/integration.rs index 40812ff..a346d85 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -431,6 +431,20 @@ integration_test! { status: 200, } +// 😬鎌 +integration_test! { + name: backtick_code_interpolation_mod, + justfile: "f:\n 無{{`exit 200`}}", + args: (), + stdout: "", + stderr: "error: Backtick failed with exit code 200 + | +2 | 無{{`exit 200`}} + | ^^^^^^^^^^ +", + status: 200, +} + integration_test! { name: backtick_code_interpolation_tab, justfile: " @@ -1029,7 +1043,7 @@ integration_test! { Leading whitespace may consist of tabs or spaces, but not both | 2 | echo hello - | ^ + | ^^^^^ ", status: EXIT_FAILURE, } @@ -1056,7 +1070,7 @@ integration_test! { Recipe started with `␉␉` but found line with `␉␠` | 3 | echo goodbye - | ^ + | ^^^^^ ", status: EXIT_FAILURE, } @@ -1725,14 +1739,14 @@ a: integration_test! { name: unterminated_raw_string, justfile: " -a b=': +a b= ': ", args: ("a"), stdout: "", stderr: "error: Unterminated string | -2 | a b=': - | ^ +2 | a b= ': + | ^ ", status: EXIT_FAILURE, } @@ -1740,14 +1754,14 @@ a b=': integration_test! { name: unterminated_string, justfile: r#" -a b=": +a b= ": "#, args: ("a"), stdout: "", stderr: r#"error: Unterminated string | -2 | a b=": - | ^ +2 | a b= ": + | ^ "#, status: EXIT_FAILURE, } @@ -2051,3 +2065,64 @@ foo a=arch() o=os() f=os_family(): stderr: format!("echo {} {} {}\n", target::arch(), target::os(), target::os_family()).as_str(), status: EXIT_SUCCESS, } + +integration_test! { + name: unterminated_interpolation_eol, + justfile: " +foo: + echo {{ +", + args: (), + stdout: "", + stderr: r#"error: Unterminated interpolation + | +3 | echo {{ + | ^^ +"#, + status: EXIT_FAILURE, +} + +integration_test! { + name: unterminated_interpolation_eof, + justfile: " +foo: + echo {{", + args: (), + stdout: "", + stderr: r#"error: Unterminated interpolation + | +3 | echo {{ + | ^^ +"#, + status: EXIT_FAILURE, +} + +integration_test! { + name: unterminated_backtick, + justfile: " +foo a=\t`echo blaaaaaah: + echo {{a}}", + args: (), + stdout: "", + stderr: r#"error: Unterminated backtick + | +2 | foo a= `echo blaaaaaah: + | ^ +"#, + status: EXIT_FAILURE, +} + +integration_test! { + name: unknown_start_of_token, + justfile: " +assembly_source_files = $(wildcard src/arch/$(arch)/*.s) +", + args: (), + stdout: "", + stderr: r#"error: Unknown start of token: + | +2 | assembly_source_files = $(wildcard src/arch/$(arch)/*.s) + | ^ +"#, + status: EXIT_FAILURE, +} diff --git a/tests/interrupts.rs b/tests/interrupts.rs index 13a5294..06050fe 100644 --- a/tests/interrupts.rs +++ b/tests/interrupts.rs @@ -52,6 +52,7 @@ mod unix { } #[test] + #[ignore] fn interrupt_shebang() { interrupt_test( " @@ -63,6 +64,7 @@ default: } #[test] + #[ignore] fn interrupt_line() { interrupt_test( " diff --git a/variables.rs b/variables.rs new file mode 100644 index 0000000..678da00 --- /dev/null +++ b/variables.rs @@ -0,0 +1,28 @@ +use crate::common::*; + +pub struct Variables<'a> { + stack: Vec<&'a Expression<'a>>, +} + +impl<'a> Iterator for Variables<'a> { + type Item = &'a Token<'a>; + + fn next(&mut self) -> Option<&'a Token<'a>> { + match self.stack.pop() { + None + | Some(Expression::String { .. }) + | Some(Expression::Backtick { .. }) + | Some(Expression::Call { .. }) => None, + Some(Expression::Variable { token, .. }) => Some(token), + Some(Expression::Concatination { lhs, rhs }) => { + self.stack.push(lhs); + self.stack.push(rhs); + self.next() + } + Some(Expression::Group { expression }) => { + self.stack.push(expression); + self.next() + } + } + } +}