Starting to rewrite the parser

This commit is contained in:
Casey Rodarmor 2016-10-22 23:18:26 -07:00
parent 3b92e00ee7
commit 913bcba5f7
5 changed files with 351 additions and 529 deletions

7
Cargo.lock generated
View File

@ -3,6 +3,7 @@ name = "j"
version = "0.2.0" version = "0.2.0"
dependencies = [ dependencies = [
"clap 2.14.0 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
"tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
@ -49,6 +50,11 @@ dependencies = [
"winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "lazy_static"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.16" version = "0.2.16"
@ -163,6 +169,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d" "checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d"
"checksum clap 2.14.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5fa304b03c49ccbb005784fc26e985b5d2310b1d37f2c311ce90dbcd18ea5fde" "checksum clap 2.14.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5fa304b03c49ccbb005784fc26e985b5d2310b1d37f2c311ce90dbcd18ea5fde"
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
"checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f"
"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d" "checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d"
"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5" "checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"

View File

@ -11,3 +11,4 @@ homepage = "https://github.com/casey/j"
regex = "^0.1.77" regex = "^0.1.77"
clap = "^2.0.0" clap = "^2.0.0"
tempdir = "^0.3.5" tempdir = "^0.3.5"
lazy_static = "^0.2.1"

20
notes
View File

@ -5,11 +5,31 @@ notes
- parse lines into fragments and store in recipe - parse lines into fragments and store in recipe
- positional error messages - positional error messages
- use clippy - use clippy
- document everything, including internal stuff
- spam in rust irc chat when done
- use "kind" instead of class
- should i use // comments, since that's what's used in rust?
- vim and emacs syntax hilighting
- gah, maybe I should change it back to 'just'
. makes more sense as a name
. suggest j as alias
. should see if people are using 'j'
. doesn't conflict with autojmp
- allow calling recipes in a justfile in a different - allow calling recipes in a justfile in a different
directory: directory:
- ../foo # ../justfile:foo - ../foo # ../justfile:foo
- xyz/foo # xyz/justfile:foo - xyz/foo # xyz/justfile:foo
- #![deny(missing_docs)] - #![deny(missing_docs)]
// error on tab after space
// error on mixed leading whitespace
// error on inconsistent leading whitespace
// outer shebang
// strict recipe name checking, be lenient in tokenizing
// but strict in parsing
// duplicate recipe name error
// duplicate dependency error
// differentiate shebang and non-shebang recipe
// resolve each recipe after parsing
j: j:
- vector of substitutions - vector of substitutions

View File

@ -1,6 +1,8 @@
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
#[macro_use]
extern crate lazy_static;
extern crate regex; extern crate regex;
extern crate tempdir; extern crate tempdir;
@ -48,7 +50,7 @@ pub struct Recipe<'a> {
line_number: usize, line_number: usize,
label: &'a str, label: &'a str,
name: &'a str, name: &'a str,
leading_whitespace: &'a str, // leading_whitespace: &'a str,
lines: Vec<&'a str>, lines: Vec<&'a str>,
// fragments: Vec<Vec<Fragment<'a>>>, // fragments: Vec<Vec<Fragment<'a>>>,
// variables: BTreeSet<&'a str>, // variables: BTreeSet<&'a str>,
@ -181,6 +183,7 @@ impl<'a> Recipe<'a> {
} }
} }
/*
fn resolve<'a>( fn resolve<'a>(
text: &'a str, text: &'a str,
recipes: &BTreeMap<&str, Recipe<'a>>, recipes: &BTreeMap<&str, Recipe<'a>>,
@ -218,51 +221,58 @@ fn resolve<'a>(
stack.pop(); stack.pop();
Ok(()) Ok(())
} }
*/
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub struct Error<'a> { pub struct Error<'a> {
text: &'a str, text: &'a str,
index: usize,
line: usize, line: usize,
kind: ErrorKind<'a> column: usize,
kind: ErrorKind<'a>,
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
enum ErrorKind<'a> { enum ErrorKind<'a> {
BadRecipeName{name: &'a str}, // BadRecipeName{name: &'a str},
CircularDependency{circle: Vec<&'a str>}, // CircularDependency{circle: Vec<&'a str>},
DuplicateDependency{name: &'a str}, // DuplicateDependency{name: &'a str},
DuplicateArgument{recipe: &'a str, argument: &'a str}, // DuplicateArgument{recipe: &'a str, argument: &'a str},
DuplicateRecipe{first: usize, name: &'a str}, // DuplicateRecipe{first: usize, name: &'a str},
TabAfterSpace{whitespace: &'a str}, // TabAfterSpace{whitespace: &'a str},
MixedLeadingWhitespace{whitespace: &'a str}, // MixedLeadingWhitespace{whitespace: &'a str},
ExtraLeadingWhitespace, // ExtraLeadingWhitespace,
InconsistentLeadingWhitespace{expected: &'a str, found: &'a str}, InconsistentLeadingWhitespace{expected: &'a str, found: &'a str},
OuterShebang, OuterShebang,
NonLeadingShebang{recipe: &'a str}, // NonLeadingShebang{recipe: &'a str},
UnknownDependency{name: &'a str, unknown: &'a str}, // UnknownDependency{name: &'a str, unknown: &'a str},
Unparsable, // Unparsable,
UnparsableDependencies, // UnparsableDependencies,
UnknownStartOfToken, UnknownStartOfToken,
InternalError{message: String},
} }
fn error<'a>(text: &'a str, line: usize, kind: ErrorKind<'a>) // fn error<'a>(text: &'a str, line: usize, kind: ErrorKind<'a>)
-> Error<'a> // -> Error<'a>
{ // {
Error { // Error {
text: text, // text: text,
line: line, // line: line,
kind: kind, // kind: kind,
} // }
} // }
fn show_whitespace(text: &str) -> String { fn show_whitespace(text: &str) -> String {
text.chars().map(|c| match c { '\t' => 't', ' ' => 's', _ => c }).collect() text.chars().map(|c| match c { '\t' => 't', ' ' => 's', _ => c }).collect()
} }
/*
fn mixed(text: &str) -> bool { fn mixed(text: &str) -> bool {
!(text.chars().all(|c| c == ' ') || text.chars().all(|c| c == '\t')) !(text.chars().all(|c| c == ' ') || text.chars().all(|c| c == '\t'))
} }
*/
/*
fn tab_after_space(text: &str) -> bool { fn tab_after_space(text: &str) -> bool {
let mut space = false; let mut space = false;
for c in text.chars() { for c in text.chars() {
@ -276,66 +286,70 @@ fn tab_after_space(text: &str) -> bool {
} }
return false; return false;
} }
*/
impl<'a> Display for Error<'a> { impl<'a> Display for Error<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
try!(write!(f, "justfile:{}: ", self.line)); try!(write!(f, "justfile:{}: ", self.line));
match self.kind { match self.kind {
ErrorKind::BadRecipeName{name} => { // ErrorKind::BadRecipeName{name} => {
try!(writeln!(f, "recipe name does not match /[a-z](-[a-z]|[a-z])*/: {}", name)); // try!(writeln!(f, "recipe name does not match /[a-z](-[a-z]|[a-z])*/: {}", name));
} // }
ErrorKind::CircularDependency{ref circle} => { // ErrorKind::CircularDependency{ref circle} => {
try!(write!(f, "circular dependency: {}", circle.join(" -> "))); // try!(write!(f, "circular dependency: {}", circle.join(" -> ")));
return Ok(()); // return Ok(());
} // }
ErrorKind::DuplicateArgument{recipe, argument} => { // ErrorKind::DuplicateArgument{recipe, argument} => {
try!(writeln!(f, "recipe {} has duplicate argument: {}", recipe, argument)); // try!(writeln!(f, "recipe {} has duplicate argument: {}", recipe, argument));
} //}
ErrorKind::DuplicateDependency{name} => { // ErrorKind::DuplicateDependency{name} => {
try!(writeln!(f, "duplicate dependency: {}", name)); // try!(writeln!(f, "duplicate dependency: {}", name));
} // }
ErrorKind::DuplicateRecipe{first, name} => { // ErrorKind::DuplicateRecipe{first, name} => {
try!(write!(f, "duplicate recipe: {} appears on lines {} and {}", // try!(write!(f, "duplicate recipe: {} appears on lines {} and {}",
name, first, self.line)); // name, first, self.line));
return Ok(()); // return Ok(());
} // }
ErrorKind::TabAfterSpace{whitespace} => { // ErrorKind::TabAfterSpace{whitespace} => {
try!(writeln!(f, "found tab after space: {}", show_whitespace(whitespace))); // try!(writeln!(f, "found tab after space: {}", show_whitespace(whitespace)));
} // }
ErrorKind::MixedLeadingWhitespace{whitespace} => { // ErrorKind::MixedLeadingWhitespace{whitespace} => {
try!(writeln!(f, // try!(writeln!(f,
"inconsistant leading whitespace: recipe started with {}:", // "inconsistant leading whitespace: recipe started with {}:",
show_whitespace(whitespace) // show_whitespace(whitespace)
)); // ));
} // }
ErrorKind::ExtraLeadingWhitespace => { // ErrorKind::ExtraLeadingWhitespace => {
try!(writeln!(f, "line has extra leading whitespace")); // try!(writeln!(f, "line has extra leading whitespace"));
} // }
ErrorKind::InconsistentLeadingWhitespace{expected, found} => { ErrorKind::InconsistentLeadingWhitespace{expected, found} => {
try!(writeln!(f, try!(writeln!(f,
"inconsistant leading whitespace: recipe started with {} but found line with {}:", "inconsistant leading whitespace: recipe started with \"{}\" but found line with \"{}\":",
show_whitespace(expected), show_whitespace(found) show_whitespace(expected), show_whitespace(found)
)); ));
} }
ErrorKind::OuterShebang => { ErrorKind::OuterShebang => {
try!(writeln!(f, "a shebang \"#!\" is reserved syntax outside of recipes")) try!(writeln!(f, "a shebang \"#!\" is reserved syntax outside of recipes"))
} }
ErrorKind::NonLeadingShebang{..} => { // ErrorKind::NonLeadingShebang{..} => {
try!(writeln!(f, "a shebang \"#!\" may only appear on the first line of a recipe")) // try!(writeln!(f, "a shebang \"#!\" may only appear on the first line of a recipe"))
} //}
ErrorKind::UnknownDependency{name, unknown} => { // ErrorKind::UnknownDependency{name, unknown} => {
try!(writeln!(f, "recipe {} has unknown dependency {}", name, unknown)); // try!(writeln!(f, "recipe {} has unknown dependency {}", name, unknown));
} // }
ErrorKind::Unparsable => { // ErrorKind::Unparsable => {
try!(writeln!(f, "could not parse line:")); // try!(writeln!(f, "could not parse line:"));
} // }
ErrorKind::UnparsableDependencies => { // ErrorKind::UnparsableDependencies => {
try!(writeln!(f, "could not parse dependencies:")); // try!(writeln!(f, "could not parse dependencies:"));
} // }
ErrorKind::UnknownStartOfToken => { ErrorKind::UnknownStartOfToken => {
try!(writeln!(f, "uknown start of token:")); try!(writeln!(f, "uknown start of token:"));
} }
ErrorKind::InternalError{ref message} => {
try!(writeln!(f, "internal error, this may indicate a bug in j: {}\n consider filing an issue: https://github.com/casey/j/issues/new", message));
}
} }
match self.text.lines().nth(self.line) { match self.text.lines().nth(self.line) {
@ -454,14 +468,26 @@ impl<'a> Display for RunError<'a> {
} }
struct Token<'a> { struct Token<'a> {
// index: usize, index: usize,
line: usize, line: usize,
// col: usize, column: usize,
prefix: &'a str, prefix: &'a str,
lexeme: &'a str, lexeme: &'a str,
class: TokenClass, class: TokenClass,
} }
impl<'a> Token<'a> {
fn error(&self, text: &'a str, kind: ErrorKind<'a>) -> Error<'a> {
Error {
text: text,
index: self.index,
line: self.line,
column: self.column,
kind: kind,
}
}
}
#[derive(Debug, PartialEq, Clone, Copy)] #[derive(Debug, PartialEq, Clone, Copy)]
enum TokenClass { enum TokenClass {
Name, Name,
@ -486,86 +512,78 @@ fn token(pattern: &str) -> Regex {
} }
fn tokenize(text: &str) -> Result<Vec<Token>, Error> { fn tokenize(text: &str) -> Result<Vec<Token>, Error> {
let name_re = token(r"[a-z]((_|-)?[a-z0-9])*"); lazy_static! {
let colon_re = token(r":" ); static ref EOF: Regex = token(r"(?-m)$" );
let equals_re = token(r"=" ); static ref NAME: Regex = token(r"[a-z]((_|-)?[a-z0-9])*");
let comment_re = token(r"#([^!].*)?$" ); static ref COLON: Regex = token(r":" );
//let shebang_re = token(r"#!" ); static ref EQUALS: Regex = token(r"=" );
let eol_re = token(r"\n|\r\n" ); static ref COMMENT: Regex = token(r"#([^!].*)?$" );
let eof_re = token(r"(?-m)$" ); static ref EOL: Regex = token(r"\n|\r\n" );
//let line_re = token(r"[^\n\r]" ); static ref LINE: Regex = re(r"^(?m)[ \t]+[^ \t\n\r].*$");
static ref INDENT: Regex = re(r"^([ \t]*)[^ \t\n\r]" );
//let split_re = re("(?m)$");
//let body_re = re(r"^(?ms)(.*?$)\s*(^[^ \t\r\n]|(?-m:$))");
// let dedent_re = re(r"^(?m)\s*(^[^\s]|(?-m:$))");
let line_re = re(r"^(?m)[ \t]+[^ \t\n\r].*$");
/*
#[derive(PartialEq)]
enum State<'a> {
Normal, // starting state
Colon, // we have seen a colon since the last eol
Recipe, // we are on the line after a colon
Body{indent: &'a str}, // we are in a recipe body
} }
*/
// state is:
// beginning of line or not
// current indent
fn indentation(text: &str) -> Option<&str> { fn indentation(text: &str) -> Option<&str> {
// fix this so it isn't recompiled every time INDENT.captures(text).map(|captures| captures.at(1).unwrap())
let indent_re = re(r"^([ \t]*)[^ \t\n\r]");
indent_re.captures(text).map(|captures| captures.at(1).unwrap())
} }
let mut tokens = vec![]; let mut tokens = vec![];
let mut rest = text; let mut rest = text;
// let mut index = 0; let mut index = 0;
let mut line = 0; let mut line = 0;
let mut col = 0; let mut column = 0;
let mut indent: Option<&str> = None; let mut indent: Option<&str> = None;
// let mut line = 0;
// let mut col = 0; macro_rules! error {
// let mut state = State::Normal; ($kind:expr) => {{
// let mut line_start = true; Err(Error {
loop { text: text,
if col == 0 { index: index,
if let Some(class) = match (indent, indentation(rest)) { line: line,
// dedent column: column,
(Some(_), Some("")) => { kind: $kind,
indent = None; })
Some(Dedent) }};
} }
loop {
if column == 0 {
if let Some(class) = match (indent, indentation(rest)) {
// ignore: was no indentation and there still isn't
(None, Some("")) => { (None, Some("")) => {
None None
} }
// indent // ignore: current line is blank
(_, None) => {
None
}
// indent: was no indentation, now there is
(None, Some(current @ _)) => { (None, Some(current @ _)) => {
// check mixed leading whitespace // check mixed leading whitespace
indent = Some(current); indent = Some(current);
Some(Indent) Some(Indent)
} }
(Some(previous), Some(current @ _)) => { // dedent: there was indentation and now there isn't
(Some(_), Some("")) => {
indent = None;
Some(Dedent)
}
// was indentation and still is, check if the new indentation matches
(Some(previous), Some(current)) => {
if !current.starts_with(previous) { if !current.starts_with(previous) {
return Err(error(text, line, return error!(ErrorKind::InconsistentLeadingWhitespace{
ErrorKind::InconsistentLeadingWhitespace{expected: previous, found: current} expected: previous,
)); found: current
});
} }
None None
// check tabs after spaces // check tabs after spaces
} }
// ignore
_ => {
None
}
} { } {
tokens.push(Token { tokens.push(Token {
// index: index, index: index,
line: line, line: line,
// col: col, column: column,
prefix: "", prefix: "",
lexeme: "", lexeme: "",
class: class, class: class,
@ -574,159 +592,39 @@ fn tokenize(text: &str) -> Result<Vec<Token>, Error> {
} }
let (prefix, lexeme, class) = let (prefix, lexeme, class) =
if let (0, Some(indent), Some(captures)) = (col, indent, line_re.captures(rest)) { if let (0, Some(indent), Some(captures)) = (column, indent, LINE.captures(rest)) {
let line = captures.at(0).unwrap(); let line = captures.at(0).unwrap();
if !line.starts_with(indent) { if !line.starts_with(indent) {
panic!("Line did not start with expected indentation"); return error!(ErrorKind::InternalError{message: "unexpected indent".to_string()});
} }
let (prefix, lexeme) = line.split_at(indent.len()); let (prefix, lexeme) = line.split_at(indent.len());
(prefix, lexeme, Line) (prefix, lexeme, Line)
} else if let Some(captures) = name_re.captures(rest) { } else if let Some(captures) = NAME.captures(rest) {
(captures.at(1).unwrap(), captures.at(2).unwrap(), Name) (captures.at(1).unwrap(), captures.at(2).unwrap(), Name)
} else if let Some(captures) = eol_re.captures(rest) { } else if let Some(captures) = EOL.captures(rest) {
(captures.at(1).unwrap(), captures.at(2).unwrap(), Eol) (captures.at(1).unwrap(), captures.at(2).unwrap(), Eol)
} else if let Some(captures) = eof_re.captures(rest) { } else if let Some(captures) = EOF.captures(rest) {
(captures.at(1).unwrap(), captures.at(2).unwrap(), Eof) (captures.at(1).unwrap(), captures.at(2).unwrap(), Eof)
} else if let Some(captures) = colon_re.captures(rest) { } else if let Some(captures) = COLON.captures(rest) {
(captures.at(1).unwrap(), captures.at(2).unwrap(), Colon) (captures.at(1).unwrap(), captures.at(2).unwrap(), Colon)
} else if let Some(captures) = equals_re.captures(rest) { } else if let Some(captures) = EQUALS.captures(rest) {
(captures.at(1).unwrap(), captures.at(2).unwrap(), Equals) (captures.at(1).unwrap(), captures.at(2).unwrap(), Equals)
} else if let Some(captures) = comment_re.captures(rest) { } else if let Some(captures) = COMMENT.captures(rest) {
(captures.at(1).unwrap(), captures.at(2).unwrap(), Comment) (captures.at(1).unwrap(), captures.at(2).unwrap(), Comment)
} else { } else {
return Err(if rest.starts_with("#!") { return if rest.starts_with("#!") {
error(text, line, ErrorKind::OuterShebang) error!(ErrorKind::OuterShebang)
} else { } else {
error(text, line, ErrorKind::UnknownStartOfToken) error!(ErrorKind::UnknownStartOfToken)
});
}; };
// let (captures, class) = if let (0, Some(captures)) = line_re.captures(rest) {
/*
*/
/*
if state == State::Recipe {
let captures = indent_re.captures(rest).unwrap();
let indent = captures.at(1).unwrap();
let text = captures.at(2).unwrap();
if indent != "" && text != "" {
tokens.push(Token {
index: index,
prefix: "",
lexeme: "",
class: TokenClass::Indent,
});
state = State::Body{indent: indent};
} else {
state = State::Normal;
}
}
*/
/*
State::Body{indent: _} => {
if let Some(captures) = body_re.captures(rest) {
let body_text = captures.at(1).unwrap();
for mut line in split_re.split(body_text) {
if let Some(captures) = line_re.captures(line) {
let len = captures.at(0).unwrap().len();
tokens.push(Token {
index: index,
prefix: captures.at(1).unwrap(),
lexeme: captures.at(2).unwrap(),
class: TokenClass::Eol,
});
line = &line[len..];
}
println!("{:?}", line);
}
panic!("matched body: {}", captures.at(1).unwrap());
// split the body into lines
// for each line in the body, push a line if nonblank, then an eol
// push a dedent
}
},
*/
// State::Normal | State::Colon | State::Body{..} => {
/*
let (captures, class) = if let Some(captures) = eol_re.captures(rest) {
(captures, TokenClass::Eol)
} else if let State::Body{indent} = state {
if dedent_re.is_match(rest) {
tokens.push(Token {
index: index,
prefix: "",
lexeme: "",
class: TokenClass::Dedent,
});
state = State::Normal;
continue
}
if let Some(captures) = line_re.captures(rest) {
(captures, TokenClass::Line)
} else {
panic!("Failed to match a line");
}
} else if let Some(captures) = anchor_re.captures(rest) {
(captures, TokenClass::Anchor)
} else if let Some(captures) = name_re.captures(rest) {
(captures, TokenClass::Name)
} else if let Some(captures) = colon_re.captures(rest) {
(captures, TokenClass::Colon)
} else if let Some(captures) = comment_re.captures(rest) {
let text = captures.at(3).unwrap_or("");
(captures, TokenClass::Comment{text: text})
} else if let Some(captures) = eof_re.captures(rest) {
(captures, TokenClass::Eof)
} else {
panic!("Did not match a token! Rest: {}", rest);
}; };
*/
// let (captures, class) = if let (true, Some(captures)) = (line_start,
// let all = captures.at(0).unwrap();
// let prefix = captures.at(1).unwrap();
// let lexeme = captures.at(2).unwrap();
// let len = all.len();
// let eof = class == TokenClass::Eof;
//assert!(eof || lexeme.len() > 0);
//assert!(all.len() > 0);
//assert!(prefix.len() + lexeme.len() == len);
/*
if class == TokenClass::Colon {
state = State::Colon;
} else if class == TokenClass::Eol && state == State::Colon {
state = State::Recipe;
}
*/
/*
if class == TokenClass::Eol {
row += 1;
col = 0;
} else {
col += len;
}
let eof = TokenClass::Eof {
}
*/
let len = prefix.len() + lexeme.len(); let len = prefix.len() + lexeme.len();
tokens.push(Token { tokens.push(Token {
// index: index, index: index,
line: line, line: line,
// col: col, column: column,
prefix: prefix, prefix: prefix,
lexeme: lexeme, lexeme: lexeme,
class: class, class: class,
@ -735,115 +633,44 @@ fn tokenize(text: &str) -> Result<Vec<Token>, Error> {
match tokens.last().unwrap().class { match tokens.last().unwrap().class {
Eol => { Eol => {
line += 1; line += 1;
col = 0; column = 0;
}, },
Eof => { Eof => {
break; break;
}, },
_ => { _ => {
col += len; column += len;
} }
} }
rest = &rest[len..]; rest = &rest[len..];
// index += len; index += len;
} }
Ok(tokens) Ok(tokens)
} }
pub fn parse<'a>(text: &'a str) -> Result<Justfile, Error> {
let tokens = try!(tokenize(text));
let filtered: Vec<_> = tokens.into_iter().filter(|t| t.class != Comment).collect();
let parser = Parser{
text: text,
tokens: filtered.into_iter().peekable()
};
let justfile = try!(parser.file());
Ok(justfile)
}
struct Parser<'a> {
text: &'a str,
tokens: std::iter::Peekable<std::vec::IntoIter<Token<'a>>>
}
impl<'a> Parser<'a> {
/* /*
struct Parser<'a, I> { fn accept(&mut self, class: TokenClass) -> Option<Token<'a>> {
tokens: Vec<Token<'a>>, if self.peek(class) {
index: usize, self.tokens.next()
}
*/
//impl<'a> Parser<'a> {
/*
fn peek(&mut self) -> TokenClass {
self.tokens[self.index].class
}
fn advance(&mut self) {
self.index += 1;
}
fn accept_eol(&mut self) -> bool {
if self.accept(TokenClass::Comment) {
self.expect(TokenClass::Eol);
true
} else
}
*/
/*
fn accept(&mut self, class: TokenClass) -> bool {
if self.tokens[self.index].class == class {
self.index += 1;
true
} else {
false
}
}
*/
/*
fn peek(&mut self) -> Option<TokenClass> {
self.tokens.get(self.index).map(|t| t.class)
}
fn file(mut self) -> Result<Justfile<'a>, Error<'a>> {
let recipes = BTreeMap::new();
loop {
let ref current = self.tokens[self.index];
self.index += 1;
match current.class {
TokenClass::Eof => break,
TokenClass::Comment => continue,
TokenClass::Eol => continue,
TokenClass::Name => {
match self.peek() {
Some(TokenClass::Name) | Some(TokenClass::Colon) => {
panic!("time to parse a recipe");
}
Some(TokenClass::Equals) => {
panic!("time to parse an assignment");
}
Some(unexpected @ _) => {
panic!("unexpected token");
}
None => {
panic!("unexpected end of token stream");
}
}
}
unexpected @ _ => {
panic!("unexpected token at top level");
}
}
}
Ok(Justfile{recipes: recipes})
}
}
*/
// struct Parser<'a, I> where I: std::iter::Iterator<Item=Token<'a>> {
// tokens: std::iter::Peekable<I>,
// }
struct Parser<'i, 't: 'i> {
text: &'t str,
tokens: &'i mut std::iter::Peekable<std::slice::Iter<'i, Token<'t>>>
}
impl<'i, 't> Parser<'i, 't> {
fn accept(&mut self, class: TokenClass) -> Option<&Token<'t>> {
if self.tokens.peek().unwrap().class == class {
Some(self.tokens.next().unwrap())
} else { } else {
None None
} }
@ -853,30 +680,28 @@ impl<'i, 't> Parser<'i, 't> {
self.accept(class).is_some() self.accept(class).is_some()
} }
fn peek(&mut self, class: TokenClass) -> bool {
self.tokens.peek().unwrap().class == class
}
*/
/*
fn expect(&mut self, class: TokenClass) { fn expect(&mut self, class: TokenClass) {
if !self.accepted(class) { if !self.accepted(class) {
panic!("we fucked"); panic!("we fucked");
} }
} }
*/
fn peek(&mut self, class: TokenClass) -> bool { /*
self.tokens.peek().unwrap().class == class
}
fn accept_eol(&mut self) -> bool {
if self.accepted(Comment) {
if !self.peek(Eof) { self.expect(Eol) };
true
} else {
self.accepted(Eol)
}
}
// fn accept(&mut self) -> Result<Token<'t>, Error<'t>> { // fn accept(&mut self) -> Result<Token<'t>, Error<'t>> {
// match self.peek( // match self.peek(
// } // }
fn recipe(&mut self, name: &'t str) -> Result<Recipe<'t>, Error<'t>> { fn recipe(&mut self, name: &'a str) -> Result<Recipe<'a>, Error<'a>> {
let mut arguments = vec![]; let mut arguments = vec![];
loop { loop {
if let Some(name_token) = self.accept(Name) { if let Some(name_token) = self.accept(Name) {
@ -896,8 +721,9 @@ impl<'i, 't> Parser<'i, 't> {
loop { loop {
if let Some(name_token) = self.accept(Name) { if let Some(name_token) = self.accept(Name) {
if dependencies.contains(&name_token.lexeme) { if dependencies.contains(&name_token.lexeme) {
return Err(error(self.text, name_token.line, ErrorKind::DuplicateDependency{ panic!("duplicate dependency");
name: name_token.lexeme})); // return Err(error(self.text, name_token.line, ErrorKind::DuplicateDependency{
// name: name_token.lexeme}));
} }
dependencies.push(name_token.lexeme); dependencies.push(name_token.lexeme);
} else { } else {
@ -913,23 +739,51 @@ impl<'i, 't> Parser<'i, 't> {
// Ok(Recipe{ // Ok(Recipe{
// }) // })
} }
*/
fn file(mut self) -> Result<Justfile<'t>, Error<'t>> { fn error(self, token: &Token<'a>, kind: ErrorKind<'a>) -> Error<'a> {
let mut recipes = BTreeMap::new(); token.error(self.text, kind)
}
fn file(mut self) -> Result<Justfile<'a>, Error<'a>> {
let recipes = BTreeMap::new();
loop {
match self.tokens.next() {
Some(token) => match token.class {
Eof => break,
Eol => continue,
_ => return Err(self.error(&token, ErrorKind::InternalError {
message: format!("unhandled token class: {:?}", token.class)
})),
},
None => return Err(Error {
text: self.text,
index: 0,
line: 0,
column: 0,
kind: ErrorKind::InternalError {
message: "unexpected end of token stream".to_string()
}
}),
}
}
/*
loop { loop {
if self.accepted(Eof) { break; } if self.accepted(Eof) { break; }
if self.accept_eol() { continue; } if self.accept_eol() { continue; }
match self.tokens.next() { match self.tokens.next() {
Some(&Token{class: Name, line, lexeme: name, ..}) => { Some(Token{class: Name, lexeme: name, ..}) => {
if self.accepted(Equals) { if self.accepted(Equals) {
panic!("Variable assignment not yet implemented"); panic!("Variable assignment not yet implemented");
} else { } else {
if recipes.contains_key(name) { if recipes.contains_key(name) {
return Err(error(self.text, line, ErrorKind::DuplicateDependency{ // return Err(error(self.text, line, ErrorKind::DuplicateDependency{
name: name, // name: name,
})); // }));
panic!("duplicate dep");
} }
let recipe = try!(self.recipe(name)); let recipe = try!(self.recipe(name));
recipes.insert(name, recipe); recipes.insert(name, recipe);
@ -938,154 +792,14 @@ impl<'i, 't> Parser<'i, 't> {
_ => panic!("got something else") _ => panic!("got something else")
}; };
} }
*/
// assert that token.next() == None if let Some(ref token) = self.tokens.next() {
return Err(self.error(token, ErrorKind::InternalError{
message: format!("unexpected token remaining after parsing completed: {:?}", token.class)
}))
}
Ok(Justfile{recipes: recipes}) Ok(Justfile{recipes: recipes})
} }
} }
// impl<'a, I> Parser<'a, I> where I: std::iter::Iterator<Item=Token<'a>> {
// fn file(mut self) -> Result<Justfile<'a>, Error<'a>> {
// Ok()
// }
// }
pub fn parse<'a>(text: &'a str) -> Result<Justfile, Error> {
let tokens = try!(tokenize(text));
// let parser = Parser{tokens: tokens, index: 0};
// try!(parser.file());
let parser = Parser{text: text, tokens: &mut tokens.iter().peekable()};
try!(parser.file());
let shebang_re = re(r"^\s*#!(.*)$" );
let comment_re = re(r"^\s*#([^!].*)?$" );
let command_re = re(r"^(\s+).*$" );
let blank_re = re(r"^\s*$" );
let label_re = re(r"^([^#]*):(.*)$" );
let name_re = re(r"^[a-z](-[a-z]|[a-z])*$");
let whitespace_re = re(r"\s+" );
let mut recipes: BTreeMap<&'a str, Recipe<'a>> = BTreeMap::new();
let mut current_recipe: Option<Recipe> = None;
for (i, line) in text.lines().enumerate() {
if blank_re.is_match(line) {
continue;
}
if let Some(mut recipe) = current_recipe {
match command_re.captures(line) {
Some(captures) => {
let leading_whitespace = captures.at(1).unwrap();
if tab_after_space(leading_whitespace) {
return Err(error(text, i, ErrorKind::TabAfterSpace{
whitespace: leading_whitespace,
}));
} else if recipe.leading_whitespace == "" {
if mixed(leading_whitespace) {
return Err(error(text, i, ErrorKind::MixedLeadingWhitespace{
whitespace: leading_whitespace
}));
}
recipe.leading_whitespace = leading_whitespace;
} else if !line.starts_with(recipe.leading_whitespace) {
return Err(error(text, i, ErrorKind::InconsistentLeadingWhitespace{
expected: recipe.leading_whitespace,
found: leading_whitespace,
}));
}
recipe.lines.push(line.split_at(recipe.leading_whitespace.len()).1);
current_recipe = Some(recipe);
continue;
},
None => {
recipes.insert(recipe.name, recipe);
current_recipe = None;
},
}
}
if comment_re.is_match(line) {
// ignore
} else if shebang_re.is_match(line) {
return Err(error(text, i, ErrorKind::OuterShebang));
} else if let Some(captures) = label_re.captures(line) {
let name = captures.at(1).unwrap();
if !name_re.is_match(name) {
return Err(error(text, i, ErrorKind::BadRecipeName {
name: name,
}));
}
if let Some(recipe) = recipes.get(name) {
return Err(error(text, i, ErrorKind::DuplicateRecipe {
first: recipe.line_number,
name: name,
}));
}
let rest = captures.at(2).unwrap().trim();
let mut dependencies = vec![];
for part in whitespace_re.split(rest) {
if name_re.is_match(part) {
if dependencies.contains(&part) {
return Err(error(text, i, ErrorKind::DuplicateDependency{
name: part,
}));
}
dependencies.push(part);
} else {
return Err(error(text, i, ErrorKind::UnparsableDependencies));
}
}
current_recipe = Some(Recipe{
line_number: i,
label: line,
name: name,
leading_whitespace: "",
lines: vec![],
// fragments: vec![],
// variables: BTreeSet::new(),
// arguments: vec![],
dependencies: dependencies,
shebang: false,
});
} else {
return Err(error(text, i, ErrorKind::Unparsable));
}
}
if let Some(recipe) = current_recipe {
recipes.insert(recipe.name, recipe);
}
let leading_whitespace_re = re(r"^\s+");
for recipe in recipes.values_mut() {
for (i, line) in recipe.lines.iter().enumerate() {
let line_number = recipe.line_number + 1 + i;
if shebang_re.is_match(line) {
if i == 0 {
recipe.shebang = true;
} else {
return Err(error(text, line_number, ErrorKind::NonLeadingShebang{recipe: recipe.name}));
}
}
if !recipe.shebang && leading_whitespace_re.is_match(line) {
return Err(error(text, line_number, ErrorKind::ExtraLeadingWhitespace));
}
}
}
let mut resolved = HashSet::new();
let mut seen = HashSet::new();
let mut stack = vec![];
for (_, ref recipe) in &recipes {
try!(resolve(text, &recipes, &mut resolved, &mut seen, &mut stack, &recipe));
}
Ok(Justfile{recipes: recipes})
}

View File

@ -37,13 +37,6 @@ fn check_recipe(
assert_eq!(recipe.dependencies.iter().cloned().collect::<Vec<_>>(), dependencies); assert_eq!(recipe.dependencies.iter().cloned().collect::<Vec<_>>(), dependencies);
} }
fn expect_success(text: &str) -> Justfile {
match super::parse(text) {
Ok(justfile) => justfile,
Err(error) => panic!("Expected successful parse but got error {}", error),
}
}
#[test] #[test]
fn circular_dependency() { fn circular_dependency() {
expect_error("a: b\nb: a", 1, ErrorKind::CircularDependency{circle: vec!["a", "b", "a"]}); expect_error("a: b\nb: a", 1, ErrorKind::CircularDependency{circle: vec!["a", "b", "a"]});
@ -213,6 +206,8 @@ a:
*/ */
use super::{Token, Error, ErrorKind, Justfile};
fn tokenize_success(text: &str, expected_summary: &str) { fn tokenize_success(text: &str, expected_summary: &str) {
let tokens = super::tokenize(text).unwrap(); let tokens = super::tokenize(text).unwrap();
let roundtrip = tokens.iter().map(|t| { let roundtrip = tokens.iter().map(|t| {
@ -225,7 +220,20 @@ fn tokenize_success(text: &str, expected_summary: &str) {
assert_eq!(token_summary(&tokens), expected_summary); assert_eq!(token_summary(&tokens), expected_summary);
} }
fn token_summary(tokens: &[super::Token]) -> String { fn tokenize_error(text: &str, expected: Error) {
if let Err(error) = super::tokenize(text) {
assert_eq!(error.text, expected.text);
assert_eq!(error.index, expected.index);
assert_eq!(error.line, expected.line);
assert_eq!(error.column, expected.column);
assert_eq!(error.kind, expected.kind);
assert_eq!(error, expected);
} else {
panic!("tokenize() succeeded but expected: {}\n{}", expected, text);
}
}
fn token_summary(tokens: &[Token]) -> String {
tokens.iter().map(|t| { tokens.iter().map(|t| {
match t.class { match t.class {
super::TokenClass::Line{..} => "*", super::TokenClass::Line{..} => "*",
@ -241,6 +249,13 @@ fn token_summary(tokens: &[super::Token]) -> String {
}).collect::<Vec<_>>().join("") }).collect::<Vec<_>>().join("")
} }
fn parse_success(text: &str) -> Justfile {
match super::parse(text) {
Ok(justfile) => justfile,
Err(error) => panic!("Expected successful parse but got error {}", error),
}
}
#[test] #[test]
fn tokenize() { fn tokenize() {
let text = "bob let text = "bob
@ -263,4 +278,69 @@ bob:
"; ";
tokenize_success(text, "$N:$>*$*$$*$$*$$<N:$>*$."); tokenize_success(text, "$N:$>*$*$$*$$*$$<N:$>*$.");
tokenize_success("a:=#", "N:=#.")
}
#[test]
fn inconsistent_leading_whitespace() {
let text = "a:
0
1
\t2
";
tokenize_error(text, Error {
text: text,
index: 9,
line: 3,
column: 0,
kind: ErrorKind::InconsistentLeadingWhitespace{expected: " ", found: "\t"},
});
let text = "a:
\t\t0
\t\t 1
\t 2
";
tokenize_error(text, Error {
text: text,
index: 12,
line: 3,
column: 0,
kind: ErrorKind::InconsistentLeadingWhitespace{expected: "\t\t", found: "\t "},
});
}
#[test]
fn outer_shebang() {
let text = "#!/usr/bin/env bash";
tokenize_error(text, Error {
text: text,
index: 0,
line: 0,
column: 0,
kind: ErrorKind::OuterShebang
});
}
#[test]
fn unknown_start_of_token() {
let text = "~";
tokenize_error(text, Error {
text: text,
index: 0,
line: 0,
column: 0,
kind: ErrorKind::UnknownStartOfToken
});
}
#[test]
fn parse() {
parse_success("
# hello
");
} }