From 0a168032476bf378075bfb50132556d2e794e994 Mon Sep 17 00:00:00 2001 From: Casey Rodarmor Date: Fri, 28 Oct 2016 00:06:36 -0700 Subject: [PATCH] Clippy fixes, bump version 0.2.3, string escapes --- Cargo.lock | 20 +++---- Cargo.toml | 2 +- notes | 27 ++-------- src/app.rs | 2 +- src/lib.rs | 145 ++++++++++++++++++++++++++++++++++++--------------- src/tests.rs | 42 +++++++++++++++ 6 files changed, 159 insertions(+), 79 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5098baa..c9c645c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,10 +1,10 @@ [root] name = "j" -version = "0.2.2" +version = "0.2.3" dependencies = [ - "clap 2.15.0 (registry+https://github.com/rust-lang/crates.io-index)", + "clap 2.16.2 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", "tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -28,7 +28,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "clap" -version = "2.15.0" +version = "2.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -78,19 +78,19 @@ dependencies = [ [[package]] name = "regex" -version = "0.1.77" +version = "0.1.80" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "regex-syntax" -version = "0.3.5" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -167,14 +167,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" "checksum ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "23ac7c30002a5accbf7e8987d0632fa6de155b7c3d39d0067317a391e00a2ef6" "checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d" -"checksum clap 2.15.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c3ad95014a5d1926493801463817b2e7e3ee64e051361a560f805c5320cd17b1" +"checksum clap 2.16.2 (registry+https://github.com/rust-lang/crates.io-index)" = "08aac7b078ec0a58e1d4b43cfb11d47001f8eb7c6f6f2bda4f5eed43c82491f1" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" "checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f" "checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d" "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" "checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5" -"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665" -"checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd" +"checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f" +"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957" "checksum strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "50c069df92e4b01425a8bf3576d5d417943a6a7272fbabaf5bd80b1aaa76442e" "checksum tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "87974a6f5c1dfb344d733055601650059a3363de2a6104819293baff662132d6" "checksum term_size 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f7f5f3f71b0040cecc71af239414c23fd3c73570f5ff54cf50e03cef637f2a0" diff --git a/Cargo.toml b/Cargo.toml index d3c2750..7582428 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "j" -version = "0.2.2" +version = "0.2.3" authors = ["Casey Rodarmor "] license = "WTFPL/MIT/Apache-2.0" description = "a command runner" diff --git a/notes b/notes index 1c69bb7..94a3bc3 100644 --- a/notes +++ b/notes @@ -1,35 +1,13 @@ notes ----- -- implement string parsing - -\n \r \t \\ \" - let mut evaluated = String::new(); - let mut escape = false; - for c in contents.chars() { - if escape { - match c { - 'n' => evaluated.push('\n'), - 'r' => evaluated.push('\r'), - 't' => evaluated.push('\t'), - '\\' => evaluated.push('\\'), - '"' => evaluated.push('"'), - other => panic!("bad escape sequence: {}", other), - } - } else if c == '\\' { - escape = true; - } else { - evaluated.push(c); - } - } - if escape { - } - evaluated +- get weird of that weird extra printing - integration testing . run app with command line options and test output . exercise all features and all command line options . test that first recipe runs by default + . test that a few error messages are correct - underline problem token in error messages @@ -53,6 +31,7 @@ notes - before release: - rewrite grammar.txt +- make it clear it's beta - change name back to 'just', suggest j as alias - change description to "a polyglot command runner"? - update readme diff --git a/src/app.rs b/src/app.rs index fc79ab7..5d85fb9 100644 --- a/src/app.rs +++ b/src/app.rs @@ -21,7 +21,7 @@ macro_rules! die { pub fn app() { let matches = App::new("j") - .version("0.2.2") + .version("0.2.3") .author("Casey R. ") .about("Just a command runner - https://github.com/casey/j") .arg(Arg::with_name("list") diff --git a/src/lib.rs b/src/lib.rs index db7f825..ce58a75 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,6 +26,7 @@ macro_rules! warn { let _ = writeln!(&mut std::io::stderr(), $($arg)*); }}; } + macro_rules! die { ($($arg:tt)*) => {{ extern crate std; @@ -72,7 +73,7 @@ enum Fragment<'a> { #[derive(PartialEq, Debug)] enum Expression<'a> { Variable{name: &'a str, token: Token<'a>}, - String{contents: &'a str}, + String{raw: &'a str, cooked: String}, Concatination{lhs: Box>, rhs: Box>}, } @@ -93,9 +94,8 @@ impl<'a> Iterator for Variables<'a> { fn next(&mut self) -> Option<&'a Token<'a>> { match self.stack.pop() { - None => None, + None | Some(&Expression::String{..}) => None, Some(&Expression::Variable{ref token,..}) => Some(token), - Some(&Expression::String{..}) => None, Some(&Expression::Concatination{ref lhs, ref rhs}) => { self.stack.push(lhs); self.stack.push(rhs); @@ -109,7 +109,7 @@ impl<'a> Display for Expression<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { match *self { Expression::Variable {name, .. } => try!(write!(f, "{}", name)), - Expression::String {contents } => try!(write!(f, "\"{}\"", contents)), + Expression::String {raw, .. } => try!(write!(f, "\"{}\"", raw)), Expression::Concatination{ref lhs, ref rhs} => try!(write!(f, "{} + {}", lhs, rhs)), } Ok(()) @@ -155,7 +155,7 @@ impl<'a> Recipe<'a> { text += "\n" } for line in &self.evaluated_lines[1..] { - text += &line; + text += line; text += "\n"; } try!( @@ -238,10 +238,10 @@ impl<'a> Display for Recipe<'a> { if j == 0 { try!(write!(f, " ")); } - match piece { - &Fragment::Text{ref text} => try!(write!(f, "{}", text.lexeme)), - &Fragment::Expression{ref expression, value: None} => try!(write!(f, "{}{} # ? {}", "{{", expression, "}}")), - &Fragment::Expression{ref expression, value: Some(ref string)} => try!(write!(f, "{}{} # \"{}\"{}", "{{", expression, string, "}}")), + match *piece { + Fragment::Text{ref text} => try!(write!(f, "{}", text.lexeme)), + Fragment::Expression{ref expression, value: None} => try!(write!(f, "{}{} # ? {}", "{{", expression, "}}")), + Fragment::Expression{ref expression, value: Some(ref string)} => try!(write!(f, "{}{} # \"{}\"{}", "{{", expression, string, "}}")), } } if i + 1 < self.lines.len() { @@ -325,12 +325,12 @@ fn evaluate<'a>( } for recipe in recipes.values_mut() { - for mut fragments in recipe.lines.iter_mut() { + for fragments in &mut recipe.lines { let mut line = String::new(); for mut fragment in fragments.iter_mut() { - match fragment { - &mut Fragment::Text{ref text} => line += text.lexeme, - &mut Fragment::Expression{ref expression, ref mut value} => { + match *fragment { + Fragment::Text{ref text} => line += text.lexeme, + Fragment::Expression{ref expression, ref mut value} => { let evaluated = &try!(evaluator.evaluate_expression(&expression)); *value = Some(evaluated.clone()); line += evaluated; @@ -392,8 +392,8 @@ impl<'a, 'b> Evaluator<'a, 'b> { self.evaluated.get(name).unwrap().clone() } } - Expression::String{contents} => { - contents.to_string() + Expression::String{ref cooked, ..} => { + cooked.clone() } Expression::Concatination{ref lhs, ref rhs} => { try!(self.evaluate_expression(lhs)) @@ -416,23 +416,25 @@ struct Error<'a> { #[derive(Debug, PartialEq)] enum ErrorKind<'a> { + ArgumentShadowsVariable{argument: &'a str}, BadName{name: &'a str}, CircularRecipeDependency{recipe: &'a str, circle: Vec<&'a str>}, CircularVariableDependency{variable: &'a str, circle: Vec<&'a str>}, - DuplicateDependency{recipe: &'a str, dependency: &'a str}, DuplicateArgument{recipe: &'a str, argument: &'a str}, + DuplicateDependency{recipe: &'a str, dependency: &'a str}, DuplicateRecipe{recipe: &'a str, first: usize}, DuplicateVariable{variable: &'a str}, - ArgumentShadowsVariable{argument: &'a str}, - MixedLeadingWhitespace{whitespace: &'a str}, ExtraLeadingWhitespace, InconsistentLeadingWhitespace{expected: &'a str, found: &'a str}, - OuterShebang, - UnknownDependency{recipe: &'a str, unknown: &'a str}, - UnknownVariable{variable: &'a str}, - UnknownStartOfToken, - UnexpectedToken{expected: Vec, found: TokenKind}, InternalError{message: String}, + InvalidEscapeSequence{character: char}, + MixedLeadingWhitespace{whitespace: &'a str}, + OuterShebang, + UnexpectedToken{expected: Vec, found: TokenKind}, + UnknownDependency{recipe: &'a str, unknown: &'a str}, + UnknownStartOfToken, + UnknownVariable{variable: &'a str}, + UnterminatedString, } fn show_whitespace(text: &str) -> String { @@ -485,6 +487,9 @@ impl<'a> Display for Error<'a> { try!(write!(f, "assignment to {} has circular dependency: {}", variable, circle.join(" -> "))); return Ok(()); } + ErrorKind::InvalidEscapeSequence{character} => { + try!(writeln!(f, "\\{}", character.escape_default().collect::())); + } ErrorKind::DuplicateArgument{recipe, argument} => { try!(writeln!(f, "recipe {} has duplicate argument: {}", recipe, argument)); } @@ -532,6 +537,9 @@ impl<'a> Display for Error<'a> { ErrorKind::UnknownStartOfToken => { try!(writeln!(f, "unknown start of token:")); } + ErrorKind::UnterminatedString => { + try!(writeln!(f, "unterminated string")); + } ErrorKind::InternalError{ref message} => { try!(writeln!(f, "internal error, this may indicate a bug in j: {}\n consider filing an issue: https://github.com/casey/j/issues/new", message)); } @@ -760,7 +768,7 @@ fn token(pattern: &str) -> Regex { re(&s) } -fn tokenize<'a>(text: &'a str) -> Result, Error> { +fn tokenize(text: &str) -> Result, Error> { lazy_static! { static ref EOF: Regex = token(r"(?-m)$" ); static ref NAME: Regex = token(r"([a-zA-Z0-9_-]+)" ); @@ -768,7 +776,7 @@ fn tokenize<'a>(text: &'a str) -> Result, Error> { static ref EQUALS: Regex = token(r"=" ); static ref PLUS: Regex = token(r"[+]" ); static ref COMMENT: Regex = token(r"#([^!].*)?$" ); - static ref STRING: Regex = token("\"[^\"]*\"" ); + static ref STRING: Regex = token("\"" ); static ref EOL: Regex = token(r"\n|\r\n" ); static ref INTERPOLATION_END: Regex = token(r"[}][}]" ); static ref INTERPOLATION_START_TOKEN: Regex = token(r"[{][{]" ); @@ -864,18 +872,16 @@ fn tokenize<'a>(text: &'a str) -> Result, Error> { } // insert a dedent if we're indented and we hit the end of the file - if &State::Start != state.last().unwrap() { - if EOF.is_match(rest) { - tokens.push(Token { - index: index, - line: line, - column: column, - text: text, - prefix: "", - lexeme: "", - kind: Dedent, - }); - } + if &State::Start != state.last().unwrap() && EOF.is_match(rest) { + tokens.push(Token { + index: index, + line: line, + column: column, + text: text, + prefix: "", + lexeme: "", + kind: Dedent, + }); } let (prefix, lexeme, kind) = @@ -888,7 +894,7 @@ fn tokenize<'a>(text: &'a str) -> Result, Error> { (&line[0..indent.len()], "", Line) } else if let Some(captures) = EOF.captures(rest) { (captures.at(1).unwrap(), captures.at(2).unwrap(), Eof) - } else if let &State::Text = state.last().unwrap() { + } else if let State::Text = *state.last().unwrap() { if let Some(captures) = INTERPOLATION_START.captures(rest) { state.push(State::Interpolation); ("", captures.at(0).unwrap(), InterpolationStart) @@ -927,7 +933,34 @@ fn tokenize<'a>(text: &'a str) -> Result, Error> { } else if let Some(captures) = COMMENT.captures(rest) { (captures.at(1).unwrap(), captures.at(2).unwrap(), Comment) } else if let Some(captures) = STRING.captures(rest) { - (captures.at(1).unwrap(), captures.at(2).unwrap(), StringToken) + let prefix = captures.at(1).unwrap(); + let contents = &rest[prefix.len()+1..]; + if contents.is_empty() { + return error!(ErrorKind::UnterminatedString); + } + // die on \n or \r + // stop on unescaped " + let mut len = 0; + let mut escape = false; + for c in contents.chars() { + if c == '\n' || c == '\r' { + return error!(ErrorKind::UnterminatedString); + } else if !escape && c == '"' { + break; + } else if !escape && c == '\\' { + escape = true; + } else if escape { + escape = false; + } + len += c.len_utf8(); + } + let start = prefix.len(); + let content_end = start + len + 1; + if escape || content_end >= rest.len() { + return error!(ErrorKind::UnterminatedString); + } + println!("{} {} {:?}", start, content_end, contents.chars().collect::>()); + (prefix, &rest[start..content_end + 1], StringToken) } else if rest.starts_with("#!") { return error!(ErrorKind::OuterShebang) } else { @@ -1105,7 +1138,7 @@ impl<'a> Parser<'a> { if token.lexeme.starts_with("#!") { shebang = true; } - } else if !shebang && token.lexeme.starts_with(" ") || token.lexeme.starts_with("\t") { + } else if !shebang && token.lexeme.starts_with(' ') || token.lexeme.starts_with('\t') { return Err(token.error(ErrorKind::ExtraLeadingWhitespace)); } } @@ -1143,8 +1176,34 @@ impl<'a> Parser<'a> { let first = self.tokens.next().unwrap(); let lhs = match first.kind { Name => Expression::Variable{name: first.lexeme, token: first}, - StringToken => Expression::String{contents: &first.lexeme[1..first.lexeme.len() - 1]}, - _ => return Err(self.unexpected_token(&first, &[Name, StringToken])), + StringToken => { + let raw = &first.lexeme[1..first.lexeme.len() - 1]; + let mut cooked = String::new(); + let mut escape = false; + for c in raw.chars() { + if escape { + match c { + 'n' => cooked.push('\n'), + 'r' => cooked.push('\r'), + 't' => cooked.push('\t'), + '\\' => cooked.push('\\'), + '"' => cooked.push('"'), + other => return Err(first.error(ErrorKind::InvalidEscapeSequence { + character: other, + })), + } + escape = false; + continue; + } + if c == '\\' { + escape = true; + continue; + } + cooked.push(c); + } + Expression::String{raw: raw, cooked: cooked} + } + _ => return Err(self.unexpected_token(&first, &[Name, StringToken])), }; if self.accepted(Plus) { @@ -1227,7 +1286,7 @@ impl<'a> Parser<'a> { for line in &recipe.lines { for piece in line { - if let &Fragment::Expression{ref expression, ..} = piece { + if let Fragment::Expression{ref expression, ..} = *piece { for variable in expression.variables() { let name = variable.lexeme; if !(assignments.contains_key(&name) || recipe.arguments.contains(&name)) { diff --git a/src/tests.rs b/src/tests.rs index 9d0a895..e2637a5 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -395,6 +395,48 @@ fn duplicate_variable() { }); } +#[test] +fn unterminated_string() { + let text = r#"a = ""#; + parse_error(text, Error { + text: text, + index: 3, + line: 0, + column: 3, + width: None, + kind: ErrorKind::UnterminatedString, + }); +} + +#[test] +fn unterminated_string_with_escapes() { + let text = r#"a = "\n\t\r\"\\"#; + parse_error(text, Error { + text: text, + index: 3, + line: 0, + column: 3, + width: None, + kind: ErrorKind::UnterminatedString, + }); +} + +#[test] +fn string_quote_escape() { + parse_summary( + r#"a = "hello\"""#, + r#"a = "hello\"" # "hello"""# + ); +} + +#[test] +fn string_escapes() { + parse_summary( + r#"a = "\n\t\r\"\\""#, + concat!(r#"a = "\n\t\r\"\\" "#, "# \"\n\t\r\"\\\"") + ); +} + #[test] fn self_recipe_dependency() { let text = "a: a";