Add nom_locate

This commit is contained in:
Greg Shuflin 2021-11-17 03:59:16 -08:00
parent b4b1a0cf63
commit 77030091bb
3 changed files with 88 additions and 59 deletions

18
Cargo.lock generated
View File

@ -110,6 +110,12 @@ dependencies = [
"constant_time_eq", "constant_time_eq",
] ]
[[package]]
name = "bytecount"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72feb31ffc86498dacdbd0fcebb56138e7177a8cc5cea4516031d15ae85a742e"
[[package]] [[package]]
name = "byteorder" name = "byteorder"
version = "1.3.2" version = "1.3.2"
@ -479,6 +485,17 @@ dependencies = [
"version_check 0.9.3", "version_check 0.9.3",
] ]
[[package]]
name = "nom_locate"
version = "4.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37794436ca3029a3089e0b95d42da1f0b565ad271e4d3bb4bad0c7bb70b10605"
dependencies = [
"bytecount",
"memchr",
"nom 7.1.0",
]
[[package]] [[package]]
name = "num" name = "num"
version = "0.1.42" version = "0.1.42"
@ -937,6 +954,7 @@ dependencies = [
"failure", "failure",
"itertools", "itertools",
"nom 7.1.0", "nom 7.1.0",
"nom_locate",
"peg", "peg",
"pretty_assertions", "pretty_assertions",
"radix_trie", "radix_trie",

View File

@ -17,6 +17,7 @@ assert_matches = "1.5"
#peg = "0.7.0" #peg = "0.7.0"
peg = { git = "https://github.com/kevinmehall/rust-peg", rev = "960222580c8da25b17d32c2aae6f52f902728b62" } peg = { git = "https://github.com/kevinmehall/rust-peg", rev = "960222580c8da25b17d32c2aae6f52f902728b62" }
nom = "7.1.0" nom = "7.1.0"
nom_locate = "4.0.0"
schala-repl = { path = "../schala-repl" } schala-repl = { path = "../schala-repl" }

View File

@ -9,9 +9,11 @@ use nom::{
sequence::{pair, tuple, preceded}, sequence::{pair, tuple, preceded},
IResult, Parser, IResult, Parser,
}; };
use std::rc::Rc; use nom_locate::{position, LocatedSpan};
use std::rc::Rc;
type ParseResult<'a, O> = IResult<&'a str, O, VerboseError<&'a str>>; type Span<'a> = LocatedSpan<&'a str>;
type ParseResult<'a, O> = IResult<Span<'a>, O, VerboseError<Span<'a>>>;
use crate::ast::*; use crate::ast::*;
@ -19,22 +21,22 @@ fn rc_string(s: &str) -> Rc<String> {
Rc::new(s.to_string()) Rc::new(s.to_string())
} }
fn tok<'a, O>(input_parser: impl Parser<&'a str, O, VerboseError<&'a str>>) -> impl FnMut(&'a str) fn tok<'a, O>(input_parser: impl Parser<Span<'a>, O, VerboseError<Span<'a>>>) -> impl FnMut(Span<'a>)
-> IResult<&'a str, O, VerboseError<&'a str>> { -> IResult<Span<'a>, O, VerboseError<Span<'a>>> {
context("tok", context("tok",
map(tuple((ws0, input_parser)), |(_, output)| map(tuple((ws0, input_parser)), |(_, output)|
output)) output))
} }
fn kw<'a>(keyword_str: &'static str) -> impl FnMut(&'a str) -> ParseResult<()> { fn kw<'a>(keyword_str: &'static str) -> impl FnMut(Span<'a>) -> ParseResult<()> {
context("keyword", context("keyword",
tok(value((), tag(keyword_str)))) tok(value((), tag(keyword_str))))
} }
// whitespace does consume at least one piece of whitespace - use ws0 for maybe none // whitespace does consume at least one piece of whitespace - use ws0 for maybe none
fn whitespace(input: &str) -> ParseResult<()> { fn whitespace(input: Span) -> ParseResult<()> {
context("whitespace", context("whitespace",
alt(( alt((
block_comment, block_comment,
@ -43,18 +45,18 @@ fn whitespace(input: &str) -> ParseResult<()> {
)))(input) )))(input)
} }
fn ws0(input: &str) -> ParseResult<()> { fn ws0(input: Span) -> ParseResult<()> {
context("WS0", context("WS0",
value((), many0(whitespace)))(input) value((), many0(whitespace)))(input)
} }
fn line_comment(input: &str) -> ParseResult<()> { fn line_comment(input: Span) -> ParseResult<()> {
value((), value((),
tuple((tag("//"), not_line_ending)), tuple((tag("//"), not_line_ending)),
)(input) )(input)
} }
fn block_comment(input: &str) -> ParseResult<()> { fn block_comment(input: Span) -> ParseResult<()> {
context("Block-comment", context("Block-comment",
value((), value((),
tuple(( tuple((
@ -68,7 +70,7 @@ fn block_comment(input: &str) -> ParseResult<()> {
))))(input) ))))(input)
} }
fn statement_delimiter(input: &str) -> ParseResult<()> { fn statement_delimiter(input: Span) -> ParseResult<()> {
tok(alt(( tok(alt((
value((), line_ending), value((), line_ending),
value((), char(';')) value((), char(';'))
@ -76,7 +78,7 @@ fn statement_delimiter(input: &str) -> ParseResult<()> {
)(input) )(input)
} }
fn block(input: &str) -> ParseResult<Block> { fn block(input: Span) -> ParseResult<Block> {
context("block", context("block",
map( map(
tuple(( tuple((
@ -88,7 +90,7 @@ fn block(input: &str) -> ParseResult<Block> {
)), |(_, _, items, _, _)| items.into()))(input) )), |(_, _, items, _, _)| items.into()))(input)
} }
fn statement(input: &str) -> ParseResult<Statement> { fn statement(input: Span) -> ParseResult<Statement> {
context("Parsing-statement", context("Parsing-statement",
map(expression, |expr| Statement { map(expression, |expr| Statement {
id: Default::default(), id: Default::default(),
@ -97,17 +99,17 @@ fn statement(input: &str) -> ParseResult<Statement> {
}))(input) }))(input)
} }
fn expression(input: &str) -> ParseResult<Expression> { fn expression(input: Span) -> ParseResult<Expression> {
map(pair(expression_kind, opt(type_anno)), |(kind, maybe_anno)| { map(pair(expression_kind, opt(type_anno)), |(kind, maybe_anno)| {
Expression::new(Default::default(), kind) Expression::new(Default::default(), kind)
})(input) })(input)
} }
fn type_anno(input: &str) -> ParseResult<TypeIdentifier> { fn type_anno(input: Span) -> ParseResult<TypeIdentifier> {
preceded(kw(":"), type_identifier)(input) preceded(kw(":"), type_identifier)(input)
} }
fn type_identifier(input: &str) -> ParseResult<TypeIdentifier> { fn type_identifier(input: Span) -> ParseResult<TypeIdentifier> {
/* /*
alt(( alt((
tuple((kw("("), separated_list0(kw(","), type_identifier), kw(")"))), tuple((kw("("), separated_list0(kw(","), type_identifier), kw(")"))),
@ -117,16 +119,15 @@ fn type_identifier(input: &str) -> ParseResult<TypeIdentifier> {
unimplemented!() unimplemented!()
} }
fn type_singleton_name(input: &str) -> ParseResult<TypeSingletonName> { fn type_singleton_name(input: Span) -> ParseResult<TypeSingletonName> {
unimplemented!() unimplemented!()
} }
pub fn expression_kind(input: &str) -> ParseResult<ExpressionKind> { pub fn expression_kind(input: Span) -> ParseResult<ExpressionKind> {
context("expression-kind", primary_expr)(input) context("expression-kind", primary_expr)(input)
} }
fn primary_expr(input: &str) -> ParseResult<ExpressionKind> { fn primary_expr(input: Span) -> ParseResult<ExpressionKind> {
context("primary-expr", context("primary-expr",
alt(( alt((
number_literal, number_literal,
@ -136,19 +137,19 @@ fn primary_expr(input: &str) -> ParseResult<ExpressionKind> {
} }
fn identifier_expr(input: &str) -> ParseResult<ExpressionKind> { fn identifier_expr(input: Span) -> ParseResult<ExpressionKind> {
context("identifier-expr", map(qualified_identifier, ExpressionKind::Value))(input) context("identifier-expr", map(qualified_identifier, ExpressionKind::Value))(input)
} }
fn qualified_identifier(input: &str) -> ParseResult<QualifiedName> { fn qualified_identifier(input: Span) -> ParseResult<QualifiedName> {
tok( tok(
map( map(
separated_list1(tag("::"), map(identifier, rc_string)), separated_list1(tag("::"), map(identifier, |x| rc_string(x.fragment()))),
|items| QualifiedName { id: Default::default(), components: items } |items| QualifiedName { id: Default::default(), components: items }
))(input) ))(input)
} }
fn identifier(input: &str) -> ParseResult<&str> { fn identifier(input: Span) -> ParseResult<Span> {
recognize( recognize(
tuple(( tuple((
alt((tag("_"), alpha1)), alt((tag("_"), alpha1)),
@ -156,7 +157,7 @@ fn identifier(input: &str) -> ParseResult<&str> {
)))(input) )))(input)
} }
fn bool_literal(input: &str) -> ParseResult<ExpressionKind> { fn bool_literal(input: Span) -> ParseResult<ExpressionKind> {
context("bool-literal", context("bool-literal",
alt(( alt((
map(kw("true"), |_| ExpressionKind::BoolLiteral(true)), map(kw("true"), |_| ExpressionKind::BoolLiteral(true)),
@ -164,48 +165,48 @@ fn bool_literal(input: &str) -> ParseResult<ExpressionKind> {
)))(input) )))(input)
} }
fn number_literal(input: &str) -> ParseResult<ExpressionKind> { fn number_literal(input: Span) -> ParseResult<ExpressionKind> {
map(alt((tok(hex_literal), tok(bin_literal), tok(dec_literal))), ExpressionKind::NatLiteral)(input) map(alt((tok(hex_literal), tok(bin_literal), tok(dec_literal))), ExpressionKind::NatLiteral)(input)
} }
fn dec_literal(input: &str) -> ParseResult<u64> { fn dec_literal(input: Span) -> ParseResult<u64> {
map(digits(digit_group_dec), |chars: Vec<char>| { map(digits(digit_group_dec), |chars: Vec<char>| {
let s: String = chars.into_iter().collect(); let s: String = chars.into_iter().collect();
s.parse().unwrap() s.parse().unwrap()
})(input) })(input)
} }
fn hex_literal(input: &str) -> ParseResult<u64> { fn hex_literal(input: Span) -> ParseResult<u64> {
map(preceded(alt((tag("0x"), tag("0X"))), digits(digit_group_hex)), |chars: Vec<char>| { map(preceded(alt((tag("0x"), tag("0X"))), digits(digit_group_hex)), |chars: Vec<char>| {
let s: String = chars.into_iter().collect(); let s: String = chars.into_iter().collect();
parse_hex(&s).unwrap() parse_hex(&s).unwrap()
})(input) })(input)
} }
fn bin_literal(input: &str) -> ParseResult<u64> { fn bin_literal(input: Span) -> ParseResult<u64> {
map(preceded(alt((tag("0b"), tag("0B"))), digits(digit_group_bin)), |chars: Vec<char>| { map(preceded(alt((tag("0b"), tag("0B"))), digits(digit_group_bin)), |chars: Vec<char>| {
let s: String = chars.into_iter().collect(); let s: String = chars.into_iter().collect();
parse_binary(&s).unwrap() parse_binary(&s).unwrap()
})(input) })(input)
} }
fn digits<'a, E: ParseError<&'a str>>( fn digits<'a, E: ParseError<Span<'a>>>(
digit_type: impl Parser<&'a str, Vec<char>, E>, digit_type: impl Parser<Span<'a>, Vec<char>, E>,
) -> impl FnMut(&'a str) -> IResult<&'a str, Vec<char>, E> { ) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Vec<char>, E> {
map(separated_list1(many1(char('_')), digit_type), |items: Vec<Vec<char>>| { map(separated_list1(many1(char('_')), digit_type), |items: Vec<Vec<char>>| {
items.into_iter().flatten().collect() items.into_iter().flatten().collect()
}) })
} }
fn digit_group_dec(input: &str) -> ParseResult<Vec<char>> { fn digit_group_dec(input: Span) -> ParseResult<Vec<char>> {
many1(one_of("0123456789"))(input) many1(one_of("0123456789"))(input)
} }
fn digit_group_hex(input: &str) -> ParseResult<Vec<char>> { fn digit_group_hex(input: Span) -> ParseResult<Vec<char>> {
many1(one_of("0123456789abcdefABCDEF"))(input) many1(one_of("0123456789abcdefABCDEF"))(input)
} }
fn digit_group_bin(input: &str) -> ParseResult<Vec<char>> { fn digit_group_bin(input: Span) -> ParseResult<Vec<char>> {
many1(one_of("01"))(input) many1(one_of("01"))(input)
} }
@ -252,57 +253,66 @@ mod test {
use super::*; use super::*;
fn rc(s: &str) -> Rc<String> { fn rc(s: &str) -> Rc<String> {
Rc::new(s.to_owned()) Rc::new(s.to_owned())
} }
macro_rules! qn { macro_rules! qn {
( $( $component:ident),* ) => { ( $( $component:ident),* ) => {
{ {
let mut components = vec![]; let mut components = vec![];
$( $(
components.push(rc(stringify!($component))); components.push(rc(stringify!($component)));
)* )*
QualifiedName { components, id: Default::default() } QualifiedName { components, id: Default::default() }
}
};
}
macro_rules! span {
($func:expr, $input:expr) => {
$func(Span::new($input)).map(|(span, x)| (*span.fragment(), x))
};
} }
};
}
#[test] #[test]
fn combinator_test1() { fn combinator_test1() {
assert_eq!(digits(digit_group_dec)("342").unwrap().1, vec!['3', '4', '2']); assert_eq!(span!(digits(digit_group_dec), "342"), Ok(("", vec!['3', '4', '2'])));
assert_eq!(bin_literal("0b1111qsdf"), Ok(("qsdf", 15))); assert_eq!(span!(bin_literal, "0b1111qsdf"), Ok(("qsdf", 15)));
} }
#[test] #[test]
fn combinator_test_ws0() { fn combinator_test_ws0() {
assert_eq!(block_comment("/*yolo*/").unwrap(), ("", ())); assert_eq!(span!(block_comment, "/*yolo*/"), Ok(("", ())));
assert_eq!(block_comment("/*yolo*/ jumpy /*nah*/").unwrap(), (" jumpy /*nah*/", ())); assert_eq!(span!(block_comment, "/*yolo*/ jumpy /*nah*/"), Ok((" jumpy /*nah*/", ())));
assert_eq!(ws0("/* yolo */ ").unwrap(), ("", ())); assert_eq!(span!(ws0, "/* yolo */ "), Ok(("", ())));
assert_eq!(ws0("/* /* no */ yolo */ ").unwrap(), ("", ())); assert_eq!(span!(ws0, "/* /* no */ yolo */ "), Ok(("", ())));
} }
#[test] #[test]
fn combinator_test2() { fn combinator_test2() {
for s in [" 15", " 0b1111", " 1_5_", "0XF__", "0Xf"].iter() { for s in [" 15", " 0b1111", " 1_5_", "0XF__", "0Xf"].iter() {
assert_eq!(expression_kind(s).unwrap().1, ExpressionKind::NatLiteral(15)); assert_eq!(span!(expression_kind, s).unwrap().1, ExpressionKind::NatLiteral(15));
} }
assert_eq!(expression_kind(" /*gay*/ true").unwrap().1, ExpressionKind::BoolLiteral(true)); assert_eq!(span!(expression_kind, " /*gay*/ true").unwrap().1, ExpressionKind::BoolLiteral(true));
assert_eq!(expression_kind(" /*yolo*/ barnaby").unwrap().1, ExpressionKind::Value(qn!(barnaby))); assert_eq!(span!(expression_kind, " /*yolo*/ barnaby").unwrap().1, ExpressionKind::Value(qn!(barnaby)));
} }
#[test] #[test]
fn combinator_test3() { fn combinator_test3() {
let source = "{}"; let source = "{}";
assert_eq!(block(source).unwrap().1, vec![].into()); assert_eq!(span!(block, source).unwrap().1, vec![].into());
let source = r#"{ let source = r#"{
//hella //hella
4_5 //bog 4_5 //bog
11; /*chutney*/0xf 11; /*chutney*/0xf
}"#; }"#;
let parsed = block(source).map_err(|err| match err { let parsed = span!(block, source).map_err(|err| match err {
Err::Error(err) | Err::Failure(err) => nom::error::convert_error(source, err), Err::Error(err) | Err::Failure(err) => {
let err = VerboseError { errors: err.errors.into_iter().map(|(sp, kind)| (*sp.fragment(), kind)).collect() };
nom::error::convert_error(source, err)
},
_ => panic!() _ => panic!()
}); });