Continuing work on combinator

This commit is contained in:
Greg Shuflin 2021-11-17 01:54:35 -08:00
parent 1cdaaee9a6
commit a2d5f380a8
3 changed files with 112 additions and 51 deletions

View File

@ -1,45 +1,45 @@
use nom::{ use nom::{
Err, Err,
branch::alt, branch::alt,
bytes::complete::tag, bytes::complete::{take_till, tag},
character::complete::{char, one_of, space0, space1, multispace0, line_ending}, character::complete::{alpha1, alphanumeric0, not_line_ending,none_of, char, one_of, space0, space1, multispace0, line_ending},
combinator::{peek, not, value, map}, combinator::{peek, not, value, map, recognize},
error::{context, VerboseError, ParseError}, error::{context, VerboseError, ParseError},
multi::{fold_many1, many1, many0, separated_list1}, multi::{fold_many1, many1, many0, separated_list1, separated_list0},
sequence::{tuple, preceded}, sequence::{tuple, preceded},
IResult, Parser, IResult, Parser,
}; };
use std::rc::Rc;
type ParseResult<'a, O> = IResult<&'a str, O, VerboseError<&'a str>>; type ParseResult<'a, O> = IResult<&'a str, O, VerboseError<&'a str>>;
use crate::ast::*; use crate::ast::*;
/* fn rc_string(s: &str) -> Rc<String> {
fn block(input: &str) -> ParseResult<Block> { Rc::new(s.to_string())
context("block",
map(
tuple((
char('{'),
value((), context("TOP", many0(alt((line_separator, ws))))),
block_items,
value((), many0(alt((line_separator, ws)))),
char('}'),
)), |(_, _, items, _, _)| items.into()))(input)
} }
*/
fn tok<'a, O>(input_parser: impl Parser<&'a str, O, VerboseError<&'a str>>) -> impl FnMut(&'a str)
-> IResult<&'a str, O, VerboseError<&'a str>> {
fn tok<'a, O>(input_parser: impl Parser<&'a str, O, VerboseError<&'a str>>) -> impl FnMut(&'a str) -> IResult<&'a str, O, VerboseError<&'a str>> { context("tok",
map(tuple((ws0, input_parser)), |(_, output)| output) map(tuple((ws0, input_parser)), |(_, output)|
output))
} }
fn kw<'a>(keyword_str: &'static str) -> impl FnMut(&'a str) -> ParseResult<()> {
context("keyword",
tok(value((), tag(keyword_str))))
}
// whitespace does consume at least one piece of whitespace - use ws0 for maybe none // whitespace does consume at least one piece of whitespace - use ws0 for maybe none
fn whitespace(input: &str) -> ParseResult<()> { fn whitespace(input: &str) -> ParseResult<()> {
context("whitespace", context("whitespace",
alt(( alt((
value((), space1),
line_comment,
block_comment, block_comment,
line_comment,
value((), space1),
)))(input) )))(input)
} }
@ -50,42 +50,47 @@ fn ws0(input: &str) -> ParseResult<()> {
fn line_comment(input: &str) -> ParseResult<()> { fn line_comment(input: &str) -> ParseResult<()> {
value((), value((),
tuple((tag("//"), many0(not(line_ending)), peek(line_ending))) tuple((tag("//"), not_line_ending)),
)(input) )(input)
} }
fn block_comment(input: &str) -> ParseResult<()> { fn block_comment(input: &str) -> ParseResult<()> {
context("Block-comment",
value((), value((),
tuple(( tuple((
tag("/*"), tag("/*"),
many0(alt(( many0(alt((
value((), none_of("*/")),
value((), none_of("/*")),
block_comment, block_comment,
not(tag("*/"))
))), ))),
tag("*/") tag("*/")
)))(input) ))))(input)
} }
fn line_separator(input: &str) -> ParseResult<()> { fn statement_delimiter(input: &str) -> ParseResult<()> {
alt((value((), line_ending), value((), char(';'))))(input) tok(alt((
value((), line_ending),
value((), char(';'))
))
)(input)
} }
fn block_items(input: &str) -> ParseResult<Vec<Statement>> { fn block(input: &str) -> ParseResult<Block> {
context("Block-item", context("block",
separated_list1( map(
preceded(context("LLLL", ws0), many1(line_separator)), tuple((
statement, tok(char('{')),
))(input) many0(statement_delimiter),
separated_list0(statement_delimiter, statement),
many0(statement_delimiter),
tok(char('}')),
)), |(_, _, items, _, _)| items.into()))(input)
} }
fn statement(input: &str) -> ParseResult<Statement> { fn statement(input: &str) -> ParseResult<Statement> {
context("Parsing-statement", context("Parsing-statement",
map( map(expression_kind, |kind| Statement {
tuple((
ws0,
expression_kind,
ws0
)),|(_, kind, _)| Statement {
id: Default::default(), id: Default::default(),
location: Default::default(), location: Default::default(),
kind: StatementKind::Expression(Expression::new(Default::default(), kind)), kind: StatementKind::Expression(Expression::new(Default::default(), kind)),
@ -93,18 +98,46 @@ fn statement(input: &str) -> ParseResult<Statement> {
} }
pub fn expression_kind(input: &str) -> ParseResult<ExpressionKind> { pub fn expression_kind(input: &str) -> ParseResult<ExpressionKind> {
context("expression-kind", context("expression-kind", primary_expr)(input)
}
fn primary_expr(input: &str) -> ParseResult<ExpressionKind> {
context("primary-expr",
alt(( alt((
number_literal, number_literal,
bool_literal, bool_literal,
identifier_expr,
)))(input)
}
fn identifier_expr(input: &str) -> ParseResult<ExpressionKind> {
context("identifier-expr", map(qualified_identifier, ExpressionKind::Value))(input)
}
fn qualified_identifier(input: &str) -> ParseResult<QualifiedName> {
tok(
map(
separated_list1(tag("::"), map(identifier, rc_string)),
|items| QualifiedName { id: Default::default(), components: items }
))(input)
}
fn identifier(input: &str) -> ParseResult<&str> {
recognize(
tuple((
alt((tag("_"), alpha1)),
alphanumeric0,
)))(input) )))(input)
} }
fn bool_literal(input: &str) -> ParseResult<ExpressionKind> { fn bool_literal(input: &str) -> ParseResult<ExpressionKind> {
context("bool-literal",
alt(( alt((
map(tok(tag("true")), |_| ExpressionKind::BoolLiteral(true)), map(kw("true"), |_| ExpressionKind::BoolLiteral(true)),
map(tok(tag("false")), |_| ExpressionKind::BoolLiteral(false)), map(kw("false"), |_| ExpressionKind::BoolLiteral(false)),
))(input) )))(input)
} }
fn number_literal(input: &str) -> ParseResult<ExpressionKind> { fn number_literal(input: &str) -> ParseResult<ExpressionKind> {
@ -195,32 +228,59 @@ mod test {
use super::*; use super::*;
fn rc(s: &str) -> Rc<String> {
Rc::new(s.to_owned())
}
macro_rules! qn {
( $( $component:ident),* ) => {
{
let mut components = vec![];
$(
components.push(rc(stringify!($component)));
)*
QualifiedName { components, id: Default::default() }
}
};
}
#[test] #[test]
fn combinator_test1() { fn combinator_test1() {
assert_eq!(digits(digit_group_dec)("342").unwrap().1, vec!['3', '4', '2']); assert_eq!(digits(digit_group_dec)("342").unwrap().1, vec!['3', '4', '2']);
assert_eq!(bin_literal("0b1111qsdf"), Ok(("qsdf", 15))); assert_eq!(bin_literal("0b1111qsdf"), Ok(("qsdf", 15)));
} }
#[test]
fn combinator_test_ws0() {
assert_eq!(block_comment("/*yolo*/").unwrap(), ("", ()));
assert_eq!(block_comment("/*yolo*/ jumpy /*nah*/").unwrap(), (" jumpy /*nah*/", ()));
assert_eq!(ws0("/* yolo */ ").unwrap(), ("", ()));
assert_eq!(ws0("/* /* no */ yolo */ ").unwrap(), ("", ()));
}
#[test] #[test]
fn combinator_test2() { fn combinator_test2() {
for s in ["15", "0b1111", "1_5_", "0XF__", "0Xf"].iter() { for s in [" 15", " 0b1111", " 1_5_", "0XF__", "0Xf"].iter() {
assert_eq!(expression_kind(s).unwrap().1, ExpressionKind::NatLiteral(15)); assert_eq!(expression_kind(s).unwrap().1, ExpressionKind::NatLiteral(15));
} }
assert_eq!(expression_kind(" /*gay*/ true").unwrap().1, ExpressionKind::BoolLiteral(true));
assert_eq!(expression_kind(" /*yolo*/ barnaby").unwrap().1, ExpressionKind::Value(qn!(barnaby)));
} }
/*
#[test] #[test]
fn combinator_test3() { fn combinator_test3() {
let source = "{}";
assert_eq!(block(source).unwrap().1, vec![].into());
let source = r#"{ let source = r#"{
4_5 //hella
11; 0xf 4_5 //bog
11; /*chutney*/0xf
}"#; }"#;
let parsed = block(source).map_err(|err| match err { let parsed = block(source).map_err(|err| match err {
Err::Error(err) | Err::Failure(err) => nom::error::convert_error(source, err), Err::Error(err) | Err::Failure(err) => nom::error::convert_error(source, err),
_ => panic!() _ => panic!()
}); });
//let parsed = block(source);
if let Err(err) = parsed { if let Err(err) = parsed {
println!("{}", err); println!("{}", err);
@ -239,5 +299,4 @@ mod test {
ExpressionKind::NatLiteral(15))) }, ExpressionKind::NatLiteral(15))) },
].into()); ].into());
} }
*/
} }

View File

@ -1,6 +1,6 @@
#![allow(clippy::upper_case_acronyms)] #![allow(clippy::upper_case_acronyms)]
mod combinator; pub mod combinator;
mod peg_parser; mod peg_parser;
mod test; mod test;

View File

@ -1352,3 +1352,5 @@ fn backtick_operators() {
assert_eq!(output, vec![digit!("1"), op!("plus"), digit!("2")]); assert_eq!(output, vec![digit!("1"), op!("plus"), digit!("2")]);
} }
*/ */