Starting to rewrite the parser
This commit is contained in:
parent
3b92e00ee7
commit
913bcba5f7
7
Cargo.lock
generated
7
Cargo.lock
generated
@ -3,6 +3,7 @@ name = "j"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"clap 2.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
@ -49,6 +50,11 @@ dependencies = [
|
||||
"winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.16"
|
||||
@ -163,6 +169,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d"
|
||||
"checksum clap 2.14.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5fa304b03c49ccbb005784fc26e985b5d2310b1d37f2c311ce90dbcd18ea5fde"
|
||||
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
|
||||
"checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f"
|
||||
"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d"
|
||||
"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
|
||||
"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"
|
||||
|
@ -11,3 +11,4 @@ homepage = "https://github.com/casey/j"
|
||||
regex = "^0.1.77"
|
||||
clap = "^2.0.0"
|
||||
tempdir = "^0.3.5"
|
||||
lazy_static = "^0.2.1"
|
||||
|
20
notes
20
notes
@ -5,11 +5,31 @@ notes
|
||||
- parse lines into fragments and store in recipe
|
||||
- positional error messages
|
||||
- use clippy
|
||||
- document everything, including internal stuff
|
||||
- spam in rust irc chat when done
|
||||
- use "kind" instead of class
|
||||
- should i use // comments, since that's what's used in rust?
|
||||
- vim and emacs syntax hilighting
|
||||
- gah, maybe I should change it back to 'just'
|
||||
. makes more sense as a name
|
||||
. suggest j as alias
|
||||
. should see if people are using 'j'
|
||||
. doesn't conflict with autojmp
|
||||
- allow calling recipes in a justfile in a different
|
||||
directory:
|
||||
- ../foo # ../justfile:foo
|
||||
- xyz/foo # xyz/justfile:foo
|
||||
- #![deny(missing_docs)]
|
||||
// error on tab after space
|
||||
// error on mixed leading whitespace
|
||||
// error on inconsistent leading whitespace
|
||||
// outer shebang
|
||||
// strict recipe name checking, be lenient in tokenizing
|
||||
// but strict in parsing
|
||||
// duplicate recipe name error
|
||||
// duplicate dependency error
|
||||
// differentiate shebang and non-shebang recipe
|
||||
// resolve each recipe after parsing
|
||||
|
||||
j:
|
||||
- vector of substitutions
|
||||
|
744
src/lib.rs
744
src/lib.rs
@ -1,6 +1,8 @@
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
extern crate regex;
|
||||
extern crate tempdir;
|
||||
|
||||
@ -48,7 +50,7 @@ pub struct Recipe<'a> {
|
||||
line_number: usize,
|
||||
label: &'a str,
|
||||
name: &'a str,
|
||||
leading_whitespace: &'a str,
|
||||
// leading_whitespace: &'a str,
|
||||
lines: Vec<&'a str>,
|
||||
// fragments: Vec<Vec<Fragment<'a>>>,
|
||||
// variables: BTreeSet<&'a str>,
|
||||
@ -181,6 +183,7 @@ impl<'a> Recipe<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
fn resolve<'a>(
|
||||
text: &'a str,
|
||||
recipes: &BTreeMap<&str, Recipe<'a>>,
|
||||
@ -218,51 +221,58 @@ fn resolve<'a>(
|
||||
stack.pop();
|
||||
Ok(())
|
||||
}
|
||||
*/
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Error<'a> {
|
||||
text: &'a str,
|
||||
index: usize,
|
||||
line: usize,
|
||||
kind: ErrorKind<'a>
|
||||
column: usize,
|
||||
kind: ErrorKind<'a>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum ErrorKind<'a> {
|
||||
BadRecipeName{name: &'a str},
|
||||
CircularDependency{circle: Vec<&'a str>},
|
||||
DuplicateDependency{name: &'a str},
|
||||
DuplicateArgument{recipe: &'a str, argument: &'a str},
|
||||
DuplicateRecipe{first: usize, name: &'a str},
|
||||
TabAfterSpace{whitespace: &'a str},
|
||||
MixedLeadingWhitespace{whitespace: &'a str},
|
||||
ExtraLeadingWhitespace,
|
||||
// BadRecipeName{name: &'a str},
|
||||
// CircularDependency{circle: Vec<&'a str>},
|
||||
// DuplicateDependency{name: &'a str},
|
||||
// DuplicateArgument{recipe: &'a str, argument: &'a str},
|
||||
// DuplicateRecipe{first: usize, name: &'a str},
|
||||
// TabAfterSpace{whitespace: &'a str},
|
||||
// MixedLeadingWhitespace{whitespace: &'a str},
|
||||
// ExtraLeadingWhitespace,
|
||||
InconsistentLeadingWhitespace{expected: &'a str, found: &'a str},
|
||||
OuterShebang,
|
||||
NonLeadingShebang{recipe: &'a str},
|
||||
UnknownDependency{name: &'a str, unknown: &'a str},
|
||||
Unparsable,
|
||||
UnparsableDependencies,
|
||||
// NonLeadingShebang{recipe: &'a str},
|
||||
// UnknownDependency{name: &'a str, unknown: &'a str},
|
||||
// Unparsable,
|
||||
// UnparsableDependencies,
|
||||
UnknownStartOfToken,
|
||||
InternalError{message: String},
|
||||
}
|
||||
|
||||
fn error<'a>(text: &'a str, line: usize, kind: ErrorKind<'a>)
|
||||
-> Error<'a>
|
||||
{
|
||||
Error {
|
||||
text: text,
|
||||
line: line,
|
||||
kind: kind,
|
||||
}
|
||||
}
|
||||
// fn error<'a>(text: &'a str, line: usize, kind: ErrorKind<'a>)
|
||||
// -> Error<'a>
|
||||
// {
|
||||
// Error {
|
||||
// text: text,
|
||||
// line: line,
|
||||
// kind: kind,
|
||||
// }
|
||||
// }
|
||||
|
||||
fn show_whitespace(text: &str) -> String {
|
||||
text.chars().map(|c| match c { '\t' => 't', ' ' => 's', _ => c }).collect()
|
||||
}
|
||||
|
||||
/*
|
||||
fn mixed(text: &str) -> bool {
|
||||
!(text.chars().all(|c| c == ' ') || text.chars().all(|c| c == '\t'))
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
fn tab_after_space(text: &str) -> bool {
|
||||
let mut space = false;
|
||||
for c in text.chars() {
|
||||
@ -276,66 +286,70 @@ fn tab_after_space(text: &str) -> bool {
|
||||
}
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
|
||||
impl<'a> Display for Error<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
try!(write!(f, "justfile:{}: ", self.line));
|
||||
|
||||
match self.kind {
|
||||
ErrorKind::BadRecipeName{name} => {
|
||||
try!(writeln!(f, "recipe name does not match /[a-z](-[a-z]|[a-z])*/: {}", name));
|
||||
}
|
||||
ErrorKind::CircularDependency{ref circle} => {
|
||||
try!(write!(f, "circular dependency: {}", circle.join(" -> ")));
|
||||
return Ok(());
|
||||
}
|
||||
ErrorKind::DuplicateArgument{recipe, argument} => {
|
||||
try!(writeln!(f, "recipe {} has duplicate argument: {}", recipe, argument));
|
||||
}
|
||||
ErrorKind::DuplicateDependency{name} => {
|
||||
try!(writeln!(f, "duplicate dependency: {}", name));
|
||||
}
|
||||
ErrorKind::DuplicateRecipe{first, name} => {
|
||||
try!(write!(f, "duplicate recipe: {} appears on lines {} and {}",
|
||||
name, first, self.line));
|
||||
return Ok(());
|
||||
}
|
||||
ErrorKind::TabAfterSpace{whitespace} => {
|
||||
try!(writeln!(f, "found tab after space: {}", show_whitespace(whitespace)));
|
||||
}
|
||||
ErrorKind::MixedLeadingWhitespace{whitespace} => {
|
||||
try!(writeln!(f,
|
||||
"inconsistant leading whitespace: recipe started with {}:",
|
||||
show_whitespace(whitespace)
|
||||
));
|
||||
}
|
||||
ErrorKind::ExtraLeadingWhitespace => {
|
||||
try!(writeln!(f, "line has extra leading whitespace"));
|
||||
}
|
||||
// ErrorKind::BadRecipeName{name} => {
|
||||
// try!(writeln!(f, "recipe name does not match /[a-z](-[a-z]|[a-z])*/: {}", name));
|
||||
// }
|
||||
// ErrorKind::CircularDependency{ref circle} => {
|
||||
// try!(write!(f, "circular dependency: {}", circle.join(" -> ")));
|
||||
// return Ok(());
|
||||
// }
|
||||
// ErrorKind::DuplicateArgument{recipe, argument} => {
|
||||
// try!(writeln!(f, "recipe {} has duplicate argument: {}", recipe, argument));
|
||||
//}
|
||||
// ErrorKind::DuplicateDependency{name} => {
|
||||
// try!(writeln!(f, "duplicate dependency: {}", name));
|
||||
// }
|
||||
// ErrorKind::DuplicateRecipe{first, name} => {
|
||||
// try!(write!(f, "duplicate recipe: {} appears on lines {} and {}",
|
||||
// name, first, self.line));
|
||||
// return Ok(());
|
||||
// }
|
||||
// ErrorKind::TabAfterSpace{whitespace} => {
|
||||
// try!(writeln!(f, "found tab after space: {}", show_whitespace(whitespace)));
|
||||
// }
|
||||
// ErrorKind::MixedLeadingWhitespace{whitespace} => {
|
||||
// try!(writeln!(f,
|
||||
// "inconsistant leading whitespace: recipe started with {}:",
|
||||
// show_whitespace(whitespace)
|
||||
// ));
|
||||
// }
|
||||
// ErrorKind::ExtraLeadingWhitespace => {
|
||||
// try!(writeln!(f, "line has extra leading whitespace"));
|
||||
// }
|
||||
ErrorKind::InconsistentLeadingWhitespace{expected, found} => {
|
||||
try!(writeln!(f,
|
||||
"inconsistant leading whitespace: recipe started with {} but found line with {}:",
|
||||
"inconsistant leading whitespace: recipe started with \"{}\" but found line with \"{}\":",
|
||||
show_whitespace(expected), show_whitespace(found)
|
||||
));
|
||||
}
|
||||
ErrorKind::OuterShebang => {
|
||||
try!(writeln!(f, "a shebang \"#!\" is reserved syntax outside of recipes"))
|
||||
}
|
||||
ErrorKind::NonLeadingShebang{..} => {
|
||||
try!(writeln!(f, "a shebang \"#!\" may only appear on the first line of a recipe"))
|
||||
}
|
||||
ErrorKind::UnknownDependency{name, unknown} => {
|
||||
try!(writeln!(f, "recipe {} has unknown dependency {}", name, unknown));
|
||||
}
|
||||
ErrorKind::Unparsable => {
|
||||
try!(writeln!(f, "could not parse line:"));
|
||||
}
|
||||
ErrorKind::UnparsableDependencies => {
|
||||
try!(writeln!(f, "could not parse dependencies:"));
|
||||
}
|
||||
// ErrorKind::NonLeadingShebang{..} => {
|
||||
// try!(writeln!(f, "a shebang \"#!\" may only appear on the first line of a recipe"))
|
||||
//}
|
||||
// ErrorKind::UnknownDependency{name, unknown} => {
|
||||
// try!(writeln!(f, "recipe {} has unknown dependency {}", name, unknown));
|
||||
// }
|
||||
// ErrorKind::Unparsable => {
|
||||
// try!(writeln!(f, "could not parse line:"));
|
||||
// }
|
||||
// ErrorKind::UnparsableDependencies => {
|
||||
// try!(writeln!(f, "could not parse dependencies:"));
|
||||
// }
|
||||
ErrorKind::UnknownStartOfToken => {
|
||||
try!(writeln!(f, "uknown start of token:"));
|
||||
}
|
||||
ErrorKind::InternalError{ref message} => {
|
||||
try!(writeln!(f, "internal error, this may indicate a bug in j: {}\n consider filing an issue: https://github.com/casey/j/issues/new", message));
|
||||
}
|
||||
}
|
||||
|
||||
match self.text.lines().nth(self.line) {
|
||||
@ -454,14 +468,26 @@ impl<'a> Display for RunError<'a> {
|
||||
}
|
||||
|
||||
struct Token<'a> {
|
||||
// index: usize,
|
||||
index: usize,
|
||||
line: usize,
|
||||
// col: usize,
|
||||
column: usize,
|
||||
prefix: &'a str,
|
||||
lexeme: &'a str,
|
||||
class: TokenClass,
|
||||
}
|
||||
|
||||
impl<'a> Token<'a> {
|
||||
fn error(&self, text: &'a str, kind: ErrorKind<'a>) -> Error<'a> {
|
||||
Error {
|
||||
text: text,
|
||||
index: self.index,
|
||||
line: self.line,
|
||||
column: self.column,
|
||||
kind: kind,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
enum TokenClass {
|
||||
Name,
|
||||
@ -486,86 +512,78 @@ fn token(pattern: &str) -> Regex {
|
||||
}
|
||||
|
||||
fn tokenize(text: &str) -> Result<Vec<Token>, Error> {
|
||||
let name_re = token(r"[a-z]((_|-)?[a-z0-9])*");
|
||||
let colon_re = token(r":" );
|
||||
let equals_re = token(r"=" );
|
||||
let comment_re = token(r"#([^!].*)?$" );
|
||||
//let shebang_re = token(r"#!" );
|
||||
let eol_re = token(r"\n|\r\n" );
|
||||
let eof_re = token(r"(?-m)$" );
|
||||
//let line_re = token(r"[^\n\r]" );
|
||||
|
||||
//let split_re = re("(?m)$");
|
||||
//let body_re = re(r"^(?ms)(.*?$)\s*(^[^ \t\r\n]|(?-m:$))");
|
||||
// let dedent_re = re(r"^(?m)\s*(^[^\s]|(?-m:$))");
|
||||
|
||||
let line_re = re(r"^(?m)[ \t]+[^ \t\n\r].*$");
|
||||
|
||||
/*
|
||||
#[derive(PartialEq)]
|
||||
enum State<'a> {
|
||||
Normal, // starting state
|
||||
Colon, // we have seen a colon since the last eol
|
||||
Recipe, // we are on the line after a colon
|
||||
Body{indent: &'a str}, // we are in a recipe body
|
||||
lazy_static! {
|
||||
static ref EOF: Regex = token(r"(?-m)$" );
|
||||
static ref NAME: Regex = token(r"[a-z]((_|-)?[a-z0-9])*");
|
||||
static ref COLON: Regex = token(r":" );
|
||||
static ref EQUALS: Regex = token(r"=" );
|
||||
static ref COMMENT: Regex = token(r"#([^!].*)?$" );
|
||||
static ref EOL: Regex = token(r"\n|\r\n" );
|
||||
static ref LINE: Regex = re(r"^(?m)[ \t]+[^ \t\n\r].*$");
|
||||
static ref INDENT: Regex = re(r"^([ \t]*)[^ \t\n\r]" );
|
||||
}
|
||||
*/
|
||||
|
||||
// state is:
|
||||
// beginning of line or not
|
||||
// current indent
|
||||
|
||||
fn indentation(text: &str) -> Option<&str> {
|
||||
// fix this so it isn't recompiled every time
|
||||
let indent_re = re(r"^([ \t]*)[^ \t\n\r]");
|
||||
indent_re.captures(text).map(|captures| captures.at(1).unwrap())
|
||||
INDENT.captures(text).map(|captures| captures.at(1).unwrap())
|
||||
}
|
||||
|
||||
let mut tokens = vec![];
|
||||
let mut rest = text;
|
||||
// let mut index = 0;
|
||||
let mut index = 0;
|
||||
let mut line = 0;
|
||||
let mut col = 0;
|
||||
let mut column = 0;
|
||||
let mut indent: Option<&str> = None;
|
||||
// let mut line = 0;
|
||||
// let mut col = 0;
|
||||
// let mut state = State::Normal;
|
||||
// let mut line_start = true;
|
||||
loop {
|
||||
if col == 0 {
|
||||
if let Some(class) = match (indent, indentation(rest)) {
|
||||
// dedent
|
||||
(Some(_), Some("")) => {
|
||||
indent = None;
|
||||
Some(Dedent)
|
||||
|
||||
macro_rules! error {
|
||||
($kind:expr) => {{
|
||||
Err(Error {
|
||||
text: text,
|
||||
index: index,
|
||||
line: line,
|
||||
column: column,
|
||||
kind: $kind,
|
||||
})
|
||||
}};
|
||||
}
|
||||
|
||||
loop {
|
||||
if column == 0 {
|
||||
if let Some(class) = match (indent, indentation(rest)) {
|
||||
// ignore: was no indentation and there still isn't
|
||||
(None, Some("")) => {
|
||||
None
|
||||
}
|
||||
// indent
|
||||
// ignore: current line is blank
|
||||
(_, None) => {
|
||||
None
|
||||
}
|
||||
// indent: was no indentation, now there is
|
||||
(None, Some(current @ _)) => {
|
||||
// check mixed leading whitespace
|
||||
indent = Some(current);
|
||||
Some(Indent)
|
||||
}
|
||||
(Some(previous), Some(current @ _)) => {
|
||||
// dedent: there was indentation and now there isn't
|
||||
(Some(_), Some("")) => {
|
||||
indent = None;
|
||||
Some(Dedent)
|
||||
}
|
||||
// was indentation and still is, check if the new indentation matches
|
||||
(Some(previous), Some(current)) => {
|
||||
if !current.starts_with(previous) {
|
||||
return Err(error(text, line,
|
||||
ErrorKind::InconsistentLeadingWhitespace{expected: previous, found: current}
|
||||
));
|
||||
return error!(ErrorKind::InconsistentLeadingWhitespace{
|
||||
expected: previous,
|
||||
found: current
|
||||
});
|
||||
}
|
||||
None
|
||||
// check tabs after spaces
|
||||
}
|
||||
// ignore
|
||||
_ => {
|
||||
None
|
||||
}
|
||||
} {
|
||||
tokens.push(Token {
|
||||
// index: index,
|
||||
index: index,
|
||||
line: line,
|
||||
// col: col,
|
||||
column: column,
|
||||
prefix: "",
|
||||
lexeme: "",
|
||||
class: class,
|
||||
@ -574,159 +592,39 @@ fn tokenize(text: &str) -> Result<Vec<Token>, Error> {
|
||||
}
|
||||
|
||||
let (prefix, lexeme, class) =
|
||||
if let (0, Some(indent), Some(captures)) = (col, indent, line_re.captures(rest)) {
|
||||
if let (0, Some(indent), Some(captures)) = (column, indent, LINE.captures(rest)) {
|
||||
let line = captures.at(0).unwrap();
|
||||
if !line.starts_with(indent) {
|
||||
panic!("Line did not start with expected indentation");
|
||||
return error!(ErrorKind::InternalError{message: "unexpected indent".to_string()});
|
||||
}
|
||||
let (prefix, lexeme) = line.split_at(indent.len());
|
||||
(prefix, lexeme, Line)
|
||||
} else if let Some(captures) = name_re.captures(rest) {
|
||||
} else if let Some(captures) = NAME.captures(rest) {
|
||||
(captures.at(1).unwrap(), captures.at(2).unwrap(), Name)
|
||||
} else if let Some(captures) = eol_re.captures(rest) {
|
||||
} else if let Some(captures) = EOL.captures(rest) {
|
||||
(captures.at(1).unwrap(), captures.at(2).unwrap(), Eol)
|
||||
} else if let Some(captures) = eof_re.captures(rest) {
|
||||
} else if let Some(captures) = EOF.captures(rest) {
|
||||
(captures.at(1).unwrap(), captures.at(2).unwrap(), Eof)
|
||||
} else if let Some(captures) = colon_re.captures(rest) {
|
||||
} else if let Some(captures) = COLON.captures(rest) {
|
||||
(captures.at(1).unwrap(), captures.at(2).unwrap(), Colon)
|
||||
} else if let Some(captures) = equals_re.captures(rest) {
|
||||
} else if let Some(captures) = EQUALS.captures(rest) {
|
||||
(captures.at(1).unwrap(), captures.at(2).unwrap(), Equals)
|
||||
} else if let Some(captures) = comment_re.captures(rest) {
|
||||
} else if let Some(captures) = COMMENT.captures(rest) {
|
||||
(captures.at(1).unwrap(), captures.at(2).unwrap(), Comment)
|
||||
} else {
|
||||
return Err(if rest.starts_with("#!") {
|
||||
error(text, line, ErrorKind::OuterShebang)
|
||||
return if rest.starts_with("#!") {
|
||||
error!(ErrorKind::OuterShebang)
|
||||
} else {
|
||||
error(text, line, ErrorKind::UnknownStartOfToken)
|
||||
});
|
||||
error!(ErrorKind::UnknownStartOfToken)
|
||||
};
|
||||
|
||||
|
||||
// let (captures, class) = if let (0, Some(captures)) = line_re.captures(rest) {
|
||||
|
||||
/*
|
||||
*/
|
||||
|
||||
/*
|
||||
if state == State::Recipe {
|
||||
let captures = indent_re.captures(rest).unwrap();
|
||||
let indent = captures.at(1).unwrap();
|
||||
let text = captures.at(2).unwrap();
|
||||
if indent != "" && text != "" {
|
||||
tokens.push(Token {
|
||||
index: index,
|
||||
prefix: "",
|
||||
lexeme: "",
|
||||
class: TokenClass::Indent,
|
||||
});
|
||||
state = State::Body{indent: indent};
|
||||
} else {
|
||||
state = State::Normal;
|
||||
}
|
||||
}
|
||||
*/
|
||||
/*
|
||||
State::Body{indent: _} => {
|
||||
if let Some(captures) = body_re.captures(rest) {
|
||||
let body_text = captures.at(1).unwrap();
|
||||
for mut line in split_re.split(body_text) {
|
||||
if let Some(captures) = line_re.captures(line) {
|
||||
let len = captures.at(0).unwrap().len();
|
||||
tokens.push(Token {
|
||||
index: index,
|
||||
prefix: captures.at(1).unwrap(),
|
||||
lexeme: captures.at(2).unwrap(),
|
||||
class: TokenClass::Eol,
|
||||
});
|
||||
line = &line[len..];
|
||||
}
|
||||
println!("{:?}", line);
|
||||
}
|
||||
|
||||
panic!("matched body: {}", captures.at(1).unwrap());
|
||||
|
||||
|
||||
// split the body into lines
|
||||
// for each line in the body, push a line if nonblank, then an eol
|
||||
// push a dedent
|
||||
}
|
||||
},
|
||||
*/
|
||||
// State::Normal | State::Colon | State::Body{..} => {
|
||||
/*
|
||||
let (captures, class) = if let Some(captures) = eol_re.captures(rest) {
|
||||
(captures, TokenClass::Eol)
|
||||
} else if let State::Body{indent} = state {
|
||||
if dedent_re.is_match(rest) {
|
||||
tokens.push(Token {
|
||||
index: index,
|
||||
prefix: "",
|
||||
lexeme: "",
|
||||
class: TokenClass::Dedent,
|
||||
});
|
||||
state = State::Normal;
|
||||
continue
|
||||
}
|
||||
|
||||
if let Some(captures) = line_re.captures(rest) {
|
||||
(captures, TokenClass::Line)
|
||||
} else {
|
||||
panic!("Failed to match a line");
|
||||
}
|
||||
} else if let Some(captures) = anchor_re.captures(rest) {
|
||||
(captures, TokenClass::Anchor)
|
||||
} else if let Some(captures) = name_re.captures(rest) {
|
||||
(captures, TokenClass::Name)
|
||||
} else if let Some(captures) = colon_re.captures(rest) {
|
||||
(captures, TokenClass::Colon)
|
||||
} else if let Some(captures) = comment_re.captures(rest) {
|
||||
let text = captures.at(3).unwrap_or("");
|
||||
(captures, TokenClass::Comment{text: text})
|
||||
} else if let Some(captures) = eof_re.captures(rest) {
|
||||
(captures, TokenClass::Eof)
|
||||
} else {
|
||||
panic!("Did not match a token! Rest: {}", rest);
|
||||
};
|
||||
*/
|
||||
|
||||
// let (captures, class) = if let (true, Some(captures)) = (line_start,
|
||||
|
||||
// let all = captures.at(0).unwrap();
|
||||
// let prefix = captures.at(1).unwrap();
|
||||
// let lexeme = captures.at(2).unwrap();
|
||||
// let len = all.len();
|
||||
// let eof = class == TokenClass::Eof;
|
||||
//assert!(eof || lexeme.len() > 0);
|
||||
//assert!(all.len() > 0);
|
||||
//assert!(prefix.len() + lexeme.len() == len);
|
||||
|
||||
/*
|
||||
if class == TokenClass::Colon {
|
||||
state = State::Colon;
|
||||
} else if class == TokenClass::Eol && state == State::Colon {
|
||||
state = State::Recipe;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
if class == TokenClass::Eol {
|
||||
row += 1;
|
||||
col = 0;
|
||||
} else {
|
||||
col += len;
|
||||
}
|
||||
|
||||
let eof = TokenClass::Eof {
|
||||
}
|
||||
*/
|
||||
|
||||
let len = prefix.len() + lexeme.len();
|
||||
|
||||
tokens.push(Token {
|
||||
// index: index,
|
||||
index: index,
|
||||
line: line,
|
||||
// col: col,
|
||||
column: column,
|
||||
prefix: prefix,
|
||||
lexeme: lexeme,
|
||||
class: class,
|
||||
@ -735,115 +633,44 @@ fn tokenize(text: &str) -> Result<Vec<Token>, Error> {
|
||||
match tokens.last().unwrap().class {
|
||||
Eol => {
|
||||
line += 1;
|
||||
col = 0;
|
||||
column = 0;
|
||||
},
|
||||
Eof => {
|
||||
break;
|
||||
},
|
||||
_ => {
|
||||
col += len;
|
||||
column += len;
|
||||
}
|
||||
}
|
||||
|
||||
rest = &rest[len..];
|
||||
// index += len;
|
||||
index += len;
|
||||
}
|
||||
|
||||
Ok(tokens)
|
||||
}
|
||||
|
||||
/*
|
||||
struct Parser<'a, I> {
|
||||
tokens: Vec<Token<'a>>,
|
||||
index: usize,
|
||||
}
|
||||
*/
|
||||
|
||||
//impl<'a> Parser<'a> {
|
||||
/*
|
||||
fn peek(&mut self) -> TokenClass {
|
||||
self.tokens[self.index].class
|
||||
}
|
||||
|
||||
fn advance(&mut self) {
|
||||
self.index += 1;
|
||||
}
|
||||
|
||||
fn accept_eol(&mut self) -> bool {
|
||||
if self.accept(TokenClass::Comment) {
|
||||
self.expect(TokenClass::Eol);
|
||||
true
|
||||
} else
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
fn accept(&mut self, class: TokenClass) -> bool {
|
||||
if self.tokens[self.index].class == class {
|
||||
self.index += 1;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
fn peek(&mut self) -> Option<TokenClass> {
|
||||
self.tokens.get(self.index).map(|t| t.class)
|
||||
}
|
||||
|
||||
fn file(mut self) -> Result<Justfile<'a>, Error<'a>> {
|
||||
let recipes = BTreeMap::new();
|
||||
|
||||
loop {
|
||||
let ref current = self.tokens[self.index];
|
||||
self.index += 1;
|
||||
|
||||
match current.class {
|
||||
TokenClass::Eof => break,
|
||||
TokenClass::Comment => continue,
|
||||
TokenClass::Eol => continue,
|
||||
TokenClass::Name => {
|
||||
match self.peek() {
|
||||
Some(TokenClass::Name) | Some(TokenClass::Colon) => {
|
||||
panic!("time to parse a recipe");
|
||||
}
|
||||
Some(TokenClass::Equals) => {
|
||||
panic!("time to parse an assignment");
|
||||
}
|
||||
Some(unexpected @ _) => {
|
||||
panic!("unexpected token");
|
||||
}
|
||||
None => {
|
||||
panic!("unexpected end of token stream");
|
||||
}
|
||||
}
|
||||
}
|
||||
unexpected @ _ => {
|
||||
panic!("unexpected token at top level");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Justfile{recipes: recipes})
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// struct Parser<'a, I> where I: std::iter::Iterator<Item=Token<'a>> {
|
||||
// tokens: std::iter::Peekable<I>,
|
||||
// }
|
||||
|
||||
struct Parser<'i, 't: 'i> {
|
||||
text: &'t str,
|
||||
tokens: &'i mut std::iter::Peekable<std::slice::Iter<'i, Token<'t>>>
|
||||
pub fn parse<'a>(text: &'a str) -> Result<Justfile, Error> {
|
||||
let tokens = try!(tokenize(text));
|
||||
let filtered: Vec<_> = tokens.into_iter().filter(|t| t.class != Comment).collect();
|
||||
let parser = Parser{
|
||||
text: text,
|
||||
tokens: filtered.into_iter().peekable()
|
||||
};
|
||||
let justfile = try!(parser.file());
|
||||
Ok(justfile)
|
||||
}
|
||||
|
||||
impl<'i, 't> Parser<'i, 't> {
|
||||
fn accept(&mut self, class: TokenClass) -> Option<&Token<'t>> {
|
||||
if self.tokens.peek().unwrap().class == class {
|
||||
Some(self.tokens.next().unwrap())
|
||||
struct Parser<'a> {
|
||||
text: &'a str,
|
||||
tokens: std::iter::Peekable<std::vec::IntoIter<Token<'a>>>
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
/*
|
||||
fn accept(&mut self, class: TokenClass) -> Option<Token<'a>> {
|
||||
if self.peek(class) {
|
||||
self.tokens.next()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@ -853,30 +680,28 @@ impl<'i, 't> Parser<'i, 't> {
|
||||
self.accept(class).is_some()
|
||||
}
|
||||
|
||||
fn peek(&mut self, class: TokenClass) -> bool {
|
||||
self.tokens.peek().unwrap().class == class
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
|
||||
fn expect(&mut self, class: TokenClass) {
|
||||
if !self.accepted(class) {
|
||||
panic!("we fucked");
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
fn peek(&mut self, class: TokenClass) -> bool {
|
||||
self.tokens.peek().unwrap().class == class
|
||||
}
|
||||
/*
|
||||
|
||||
fn accept_eol(&mut self) -> bool {
|
||||
if self.accepted(Comment) {
|
||||
if !self.peek(Eof) { self.expect(Eol) };
|
||||
true
|
||||
} else {
|
||||
self.accepted(Eol)
|
||||
}
|
||||
}
|
||||
|
||||
// fn accept(&mut self) -> Result<Token<'t>, Error<'t>> {
|
||||
// match self.peek(
|
||||
// }
|
||||
|
||||
fn recipe(&mut self, name: &'t str) -> Result<Recipe<'t>, Error<'t>> {
|
||||
fn recipe(&mut self, name: &'a str) -> Result<Recipe<'a>, Error<'a>> {
|
||||
let mut arguments = vec![];
|
||||
loop {
|
||||
if let Some(name_token) = self.accept(Name) {
|
||||
@ -896,8 +721,9 @@ impl<'i, 't> Parser<'i, 't> {
|
||||
loop {
|
||||
if let Some(name_token) = self.accept(Name) {
|
||||
if dependencies.contains(&name_token.lexeme) {
|
||||
return Err(error(self.text, name_token.line, ErrorKind::DuplicateDependency{
|
||||
name: name_token.lexeme}));
|
||||
panic!("duplicate dependency");
|
||||
// return Err(error(self.text, name_token.line, ErrorKind::DuplicateDependency{
|
||||
// name: name_token.lexeme}));
|
||||
}
|
||||
dependencies.push(name_token.lexeme);
|
||||
} else {
|
||||
@ -913,23 +739,51 @@ impl<'i, 't> Parser<'i, 't> {
|
||||
// Ok(Recipe{
|
||||
// })
|
||||
}
|
||||
*/
|
||||
|
||||
fn file(mut self) -> Result<Justfile<'t>, Error<'t>> {
|
||||
let mut recipes = BTreeMap::new();
|
||||
fn error(self, token: &Token<'a>, kind: ErrorKind<'a>) -> Error<'a> {
|
||||
token.error(self.text, kind)
|
||||
}
|
||||
|
||||
fn file(mut self) -> Result<Justfile<'a>, Error<'a>> {
|
||||
let recipes = BTreeMap::new();
|
||||
|
||||
loop {
|
||||
match self.tokens.next() {
|
||||
Some(token) => match token.class {
|
||||
Eof => break,
|
||||
Eol => continue,
|
||||
_ => return Err(self.error(&token, ErrorKind::InternalError {
|
||||
message: format!("unhandled token class: {:?}", token.class)
|
||||
})),
|
||||
},
|
||||
None => return Err(Error {
|
||||
text: self.text,
|
||||
index: 0,
|
||||
line: 0,
|
||||
column: 0,
|
||||
kind: ErrorKind::InternalError {
|
||||
message: "unexpected end of token stream".to_string()
|
||||
}
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
loop {
|
||||
if self.accepted(Eof) { break; }
|
||||
if self.accept_eol() { continue; }
|
||||
|
||||
match self.tokens.next() {
|
||||
Some(&Token{class: Name, line, lexeme: name, ..}) => {
|
||||
Some(Token{class: Name, lexeme: name, ..}) => {
|
||||
if self.accepted(Equals) {
|
||||
panic!("Variable assignment not yet implemented");
|
||||
} else {
|
||||
if recipes.contains_key(name) {
|
||||
return Err(error(self.text, line, ErrorKind::DuplicateDependency{
|
||||
name: name,
|
||||
}));
|
||||
// return Err(error(self.text, line, ErrorKind::DuplicateDependency{
|
||||
// name: name,
|
||||
// }));
|
||||
panic!("duplicate dep");
|
||||
}
|
||||
let recipe = try!(self.recipe(name));
|
||||
recipes.insert(name, recipe);
|
||||
@ -938,154 +792,14 @@ impl<'i, 't> Parser<'i, 't> {
|
||||
_ => panic!("got something else")
|
||||
};
|
||||
}
|
||||
*/
|
||||
|
||||
// assert that token.next() == None
|
||||
if let Some(ref token) = self.tokens.next() {
|
||||
return Err(self.error(token, ErrorKind::InternalError{
|
||||
message: format!("unexpected token remaining after parsing completed: {:?}", token.class)
|
||||
}))
|
||||
}
|
||||
|
||||
Ok(Justfile{recipes: recipes})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// impl<'a, I> Parser<'a, I> where I: std::iter::Iterator<Item=Token<'a>> {
|
||||
// fn file(mut self) -> Result<Justfile<'a>, Error<'a>> {
|
||||
// Ok()
|
||||
// }
|
||||
// }
|
||||
|
||||
pub fn parse<'a>(text: &'a str) -> Result<Justfile, Error> {
|
||||
let tokens = try!(tokenize(text));
|
||||
// let parser = Parser{tokens: tokens, index: 0};
|
||||
// try!(parser.file());
|
||||
|
||||
let parser = Parser{text: text, tokens: &mut tokens.iter().peekable()};
|
||||
try!(parser.file());
|
||||
|
||||
let shebang_re = re(r"^\s*#!(.*)$" );
|
||||
let comment_re = re(r"^\s*#([^!].*)?$" );
|
||||
let command_re = re(r"^(\s+).*$" );
|
||||
let blank_re = re(r"^\s*$" );
|
||||
let label_re = re(r"^([^#]*):(.*)$" );
|
||||
let name_re = re(r"^[a-z](-[a-z]|[a-z])*$");
|
||||
let whitespace_re = re(r"\s+" );
|
||||
|
||||
let mut recipes: BTreeMap<&'a str, Recipe<'a>> = BTreeMap::new();
|
||||
let mut current_recipe: Option<Recipe> = None;
|
||||
for (i, line) in text.lines().enumerate() {
|
||||
if blank_re.is_match(line) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(mut recipe) = current_recipe {
|
||||
match command_re.captures(line) {
|
||||
Some(captures) => {
|
||||
let leading_whitespace = captures.at(1).unwrap();
|
||||
if tab_after_space(leading_whitespace) {
|
||||
return Err(error(text, i, ErrorKind::TabAfterSpace{
|
||||
whitespace: leading_whitespace,
|
||||
}));
|
||||
} else if recipe.leading_whitespace == "" {
|
||||
if mixed(leading_whitespace) {
|
||||
return Err(error(text, i, ErrorKind::MixedLeadingWhitespace{
|
||||
whitespace: leading_whitespace
|
||||
}));
|
||||
}
|
||||
recipe.leading_whitespace = leading_whitespace;
|
||||
} else if !line.starts_with(recipe.leading_whitespace) {
|
||||
return Err(error(text, i, ErrorKind::InconsistentLeadingWhitespace{
|
||||
expected: recipe.leading_whitespace,
|
||||
found: leading_whitespace,
|
||||
}));
|
||||
}
|
||||
recipe.lines.push(line.split_at(recipe.leading_whitespace.len()).1);
|
||||
current_recipe = Some(recipe);
|
||||
continue;
|
||||
},
|
||||
None => {
|
||||
recipes.insert(recipe.name, recipe);
|
||||
current_recipe = None;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
if comment_re.is_match(line) {
|
||||
// ignore
|
||||
} else if shebang_re.is_match(line) {
|
||||
return Err(error(text, i, ErrorKind::OuterShebang));
|
||||
} else if let Some(captures) = label_re.captures(line) {
|
||||
let name = captures.at(1).unwrap();
|
||||
if !name_re.is_match(name) {
|
||||
return Err(error(text, i, ErrorKind::BadRecipeName {
|
||||
name: name,
|
||||
}));
|
||||
}
|
||||
if let Some(recipe) = recipes.get(name) {
|
||||
return Err(error(text, i, ErrorKind::DuplicateRecipe {
|
||||
first: recipe.line_number,
|
||||
name: name,
|
||||
}));
|
||||
}
|
||||
|
||||
let rest = captures.at(2).unwrap().trim();
|
||||
let mut dependencies = vec![];
|
||||
for part in whitespace_re.split(rest) {
|
||||
if name_re.is_match(part) {
|
||||
if dependencies.contains(&part) {
|
||||
return Err(error(text, i, ErrorKind::DuplicateDependency{
|
||||
name: part,
|
||||
}));
|
||||
}
|
||||
dependencies.push(part);
|
||||
} else {
|
||||
return Err(error(text, i, ErrorKind::UnparsableDependencies));
|
||||
}
|
||||
}
|
||||
|
||||
current_recipe = Some(Recipe{
|
||||
line_number: i,
|
||||
label: line,
|
||||
name: name,
|
||||
leading_whitespace: "",
|
||||
lines: vec![],
|
||||
// fragments: vec![],
|
||||
// variables: BTreeSet::new(),
|
||||
// arguments: vec![],
|
||||
dependencies: dependencies,
|
||||
shebang: false,
|
||||
});
|
||||
} else {
|
||||
return Err(error(text, i, ErrorKind::Unparsable));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(recipe) = current_recipe {
|
||||
recipes.insert(recipe.name, recipe);
|
||||
}
|
||||
|
||||
let leading_whitespace_re = re(r"^\s+");
|
||||
|
||||
for recipe in recipes.values_mut() {
|
||||
for (i, line) in recipe.lines.iter().enumerate() {
|
||||
let line_number = recipe.line_number + 1 + i;
|
||||
if shebang_re.is_match(line) {
|
||||
if i == 0 {
|
||||
recipe.shebang = true;
|
||||
} else {
|
||||
return Err(error(text, line_number, ErrorKind::NonLeadingShebang{recipe: recipe.name}));
|
||||
}
|
||||
}
|
||||
if !recipe.shebang && leading_whitespace_re.is_match(line) {
|
||||
return Err(error(text, line_number, ErrorKind::ExtraLeadingWhitespace));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut resolved = HashSet::new();
|
||||
let mut seen = HashSet::new();
|
||||
let mut stack = vec![];
|
||||
|
||||
for (_, ref recipe) in &recipes {
|
||||
try!(resolve(text, &recipes, &mut resolved, &mut seen, &mut stack, &recipe));
|
||||
}
|
||||
|
||||
Ok(Justfile{recipes: recipes})
|
||||
}
|
||||
|
96
src/tests.rs
96
src/tests.rs
@ -37,13 +37,6 @@ fn check_recipe(
|
||||
assert_eq!(recipe.dependencies.iter().cloned().collect::<Vec<_>>(), dependencies);
|
||||
}
|
||||
|
||||
fn expect_success(text: &str) -> Justfile {
|
||||
match super::parse(text) {
|
||||
Ok(justfile) => justfile,
|
||||
Err(error) => panic!("Expected successful parse but got error {}", error),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn circular_dependency() {
|
||||
expect_error("a: b\nb: a", 1, ErrorKind::CircularDependency{circle: vec!["a", "b", "a"]});
|
||||
@ -213,6 +206,8 @@ a:
|
||||
|
||||
*/
|
||||
|
||||
use super::{Token, Error, ErrorKind, Justfile};
|
||||
|
||||
fn tokenize_success(text: &str, expected_summary: &str) {
|
||||
let tokens = super::tokenize(text).unwrap();
|
||||
let roundtrip = tokens.iter().map(|t| {
|
||||
@ -225,7 +220,20 @@ fn tokenize_success(text: &str, expected_summary: &str) {
|
||||
assert_eq!(token_summary(&tokens), expected_summary);
|
||||
}
|
||||
|
||||
fn token_summary(tokens: &[super::Token]) -> String {
|
||||
fn tokenize_error(text: &str, expected: Error) {
|
||||
if let Err(error) = super::tokenize(text) {
|
||||
assert_eq!(error.text, expected.text);
|
||||
assert_eq!(error.index, expected.index);
|
||||
assert_eq!(error.line, expected.line);
|
||||
assert_eq!(error.column, expected.column);
|
||||
assert_eq!(error.kind, expected.kind);
|
||||
assert_eq!(error, expected);
|
||||
} else {
|
||||
panic!("tokenize() succeeded but expected: {}\n{}", expected, text);
|
||||
}
|
||||
}
|
||||
|
||||
fn token_summary(tokens: &[Token]) -> String {
|
||||
tokens.iter().map(|t| {
|
||||
match t.class {
|
||||
super::TokenClass::Line{..} => "*",
|
||||
@ -241,6 +249,13 @@ fn token_summary(tokens: &[super::Token]) -> String {
|
||||
}).collect::<Vec<_>>().join("")
|
||||
}
|
||||
|
||||
fn parse_success(text: &str) -> Justfile {
|
||||
match super::parse(text) {
|
||||
Ok(justfile) => justfile,
|
||||
Err(error) => panic!("Expected successful parse but got error {}", error),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize() {
|
||||
let text = "bob
|
||||
@ -263,4 +278,69 @@ bob:
|
||||
";
|
||||
|
||||
tokenize_success(text, "$N:$>*$*$$*$$*$$<N:$>*$.");
|
||||
|
||||
tokenize_success("a:=#", "N:=#.")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inconsistent_leading_whitespace() {
|
||||
let text = "a:
|
||||
0
|
||||
1
|
||||
\t2
|
||||
";
|
||||
tokenize_error(text, Error {
|
||||
text: text,
|
||||
index: 9,
|
||||
line: 3,
|
||||
column: 0,
|
||||
kind: ErrorKind::InconsistentLeadingWhitespace{expected: " ", found: "\t"},
|
||||
});
|
||||
|
||||
let text = "a:
|
||||
\t\t0
|
||||
\t\t 1
|
||||
\t 2
|
||||
";
|
||||
tokenize_error(text, Error {
|
||||
text: text,
|
||||
index: 12,
|
||||
line: 3,
|
||||
column: 0,
|
||||
kind: ErrorKind::InconsistentLeadingWhitespace{expected: "\t\t", found: "\t "},
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn outer_shebang() {
|
||||
let text = "#!/usr/bin/env bash";
|
||||
tokenize_error(text, Error {
|
||||
text: text,
|
||||
index: 0,
|
||||
line: 0,
|
||||
column: 0,
|
||||
kind: ErrorKind::OuterShebang
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_start_of_token() {
|
||||
let text = "~";
|
||||
tokenize_error(text, Error {
|
||||
text: text,
|
||||
index: 0,
|
||||
line: 0,
|
||||
column: 0,
|
||||
kind: ErrorKind::UnknownStartOfToken
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
parse_success("
|
||||
|
||||
# hello
|
||||
|
||||
|
||||
");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user