From a2d5f380a8b18d5a5def5aa95376c6df3c5266e0 Mon Sep 17 00:00:00 2001 From: Greg Shuflin Date: Wed, 17 Nov 2021 01:54:35 -0800 Subject: [PATCH] Continuing work on combinator --- schala-lang/src/parsing/combinator.rs | 159 ++++++++++++++++++-------- schala-lang/src/parsing/mod.rs | 2 +- schala-lang/src/parsing/test.rs | 2 + 3 files changed, 112 insertions(+), 51 deletions(-) diff --git a/schala-lang/src/parsing/combinator.rs b/schala-lang/src/parsing/combinator.rs index 4c7a3f4..396b44a 100644 --- a/schala-lang/src/parsing/combinator.rs +++ b/schala-lang/src/parsing/combinator.rs @@ -1,45 +1,45 @@ use nom::{ Err, branch::alt, - bytes::complete::tag, - character::complete::{char, one_of, space0, space1, multispace0, line_ending}, - combinator::{peek, not, value, map}, + bytes::complete::{take_till, tag}, + character::complete::{alpha1, alphanumeric0, not_line_ending,none_of, char, one_of, space0, space1, multispace0, line_ending}, + combinator::{peek, not, value, map, recognize}, error::{context, VerboseError, ParseError}, - multi::{fold_many1, many1, many0, separated_list1}, + multi::{fold_many1, many1, many0, separated_list1, separated_list0}, sequence::{tuple, preceded}, IResult, Parser, }; + use std::rc::Rc; type ParseResult<'a, O> = IResult<&'a str, O, VerboseError<&'a str>>; use crate::ast::*; -/* -fn block(input: &str) -> ParseResult { - context("block", - map( - tuple(( - char('{'), - value((), context("TOP", many0(alt((line_separator, ws))))), - block_items, - value((), many0(alt((line_separator, ws)))), - char('}'), - )), |(_, _, items, _, _)| items.into()))(input) +fn rc_string(s: &str) -> Rc { + Rc::new(s.to_string()) } -*/ +fn tok<'a, O>(input_parser: impl Parser<&'a str, O, VerboseError<&'a str>>) -> impl FnMut(&'a str) +-> IResult<&'a str, O, VerboseError<&'a str>> { -fn tok<'a, O>(input_parser: impl Parser<&'a str, O, VerboseError<&'a str>>) -> impl FnMut(&'a str) -> IResult<&'a str, O, VerboseError<&'a str>> { - map(tuple((ws0, input_parser)), |(_, output)| output) + context("tok", + map(tuple((ws0, input_parser)), |(_, output)| + output)) } +fn kw<'a>(keyword_str: &'static str) -> impl FnMut(&'a str) -> ParseResult<()> { + context("keyword", + tok(value((), tag(keyword_str)))) +} + + // whitespace does consume at least one piece of whitespace - use ws0 for maybe none fn whitespace(input: &str) -> ParseResult<()> { context("whitespace", alt(( - value((), space1), - line_comment, block_comment, + line_comment, + value((), space1), )))(input) } @@ -50,42 +50,47 @@ fn ws0(input: &str) -> ParseResult<()> { fn line_comment(input: &str) -> ParseResult<()> { value((), - tuple((tag("//"), many0(not(line_ending)), peek(line_ending))) + tuple((tag("//"), not_line_ending)), )(input) } fn block_comment(input: &str) -> ParseResult<()> { + context("Block-comment", value((), tuple(( tag("/*"), many0(alt(( + value((), none_of("*/")), + value((), none_of("/*")), block_comment, - not(tag("*/")) ))), tag("*/") - )))(input) + ))))(input) } -fn line_separator(input: &str) -> ParseResult<()> { - alt((value((), line_ending), value((), char(';'))))(input) +fn statement_delimiter(input: &str) -> ParseResult<()> { + tok(alt(( + value((), line_ending), + value((), char(';')) + )) + )(input) } -fn block_items(input: &str) -> ParseResult> { - context("Block-item", - separated_list1( - preceded(context("LLLL", ws0), many1(line_separator)), - statement, - ))(input) +fn block(input: &str) -> ParseResult { + context("block", + map( + tuple(( + tok(char('{')), + many0(statement_delimiter), + separated_list0(statement_delimiter, statement), + many0(statement_delimiter), + tok(char('}')), + )), |(_, _, items, _, _)| items.into()))(input) } fn statement(input: &str) -> ParseResult { context("Parsing-statement", - map( - tuple(( - ws0, - expression_kind, - ws0 - )),|(_, kind, _)| Statement { + map(expression_kind, |kind| Statement { id: Default::default(), location: Default::default(), kind: StatementKind::Expression(Expression::new(Default::default(), kind)), @@ -93,18 +98,46 @@ fn statement(input: &str) -> ParseResult { } pub fn expression_kind(input: &str) -> ParseResult { - context("expression-kind", + context("expression-kind", primary_expr)(input) +} + +fn primary_expr(input: &str) -> ParseResult { + + context("primary-expr", alt(( number_literal, bool_literal, + identifier_expr, + )))(input) + +} + +fn identifier_expr(input: &str) -> ParseResult { + context("identifier-expr", map(qualified_identifier, ExpressionKind::Value))(input) +} + +fn qualified_identifier(input: &str) -> ParseResult { + tok( + map( + separated_list1(tag("::"), map(identifier, rc_string)), + |items| QualifiedName { id: Default::default(), components: items } + ))(input) +} + +fn identifier(input: &str) -> ParseResult<&str> { + recognize( + tuple(( + alt((tag("_"), alpha1)), + alphanumeric0, )))(input) } fn bool_literal(input: &str) -> ParseResult { + context("bool-literal", alt(( - map(tok(tag("true")), |_| ExpressionKind::BoolLiteral(true)), - map(tok(tag("false")), |_| ExpressionKind::BoolLiteral(false)), - ))(input) + map(kw("true"), |_| ExpressionKind::BoolLiteral(true)), + map(kw("false"), |_| ExpressionKind::BoolLiteral(false)), + )))(input) } fn number_literal(input: &str) -> ParseResult { @@ -195,6 +228,21 @@ mod test { use super::*; +fn rc(s: &str) -> Rc { + Rc::new(s.to_owned()) +} +macro_rules! qn { + ( $( $component:ident),* ) => { + { + let mut components = vec![]; + $( + components.push(rc(stringify!($component))); + )* + QualifiedName { components, id: Default::default() } + } + }; +} + #[test] fn combinator_test1() { assert_eq!(digits(digit_group_dec)("342").unwrap().1, vec!['3', '4', '2']); @@ -202,25 +250,37 @@ mod test { } #[test] - fn combinator_test2() { - for s in ["15", "0b1111", "1_5_", "0XF__", "0Xf"].iter() { - assert_eq!(expression_kind(s).unwrap().1, ExpressionKind::NatLiteral(15)); - } + fn combinator_test_ws0() { + assert_eq!(block_comment("/*yolo*/").unwrap(), ("", ())); + assert_eq!(block_comment("/*yolo*/ jumpy /*nah*/").unwrap(), (" jumpy /*nah*/", ())); + assert_eq!(ws0("/* yolo */ ").unwrap(), ("", ())); + assert_eq!(ws0("/* /* no */ yolo */ ").unwrap(), ("", ())); + } + + #[test] + fn combinator_test2() { + for s in [" 15", " 0b1111", " 1_5_", "0XF__", "0Xf"].iter() { + assert_eq!(expression_kind(s).unwrap().1, ExpressionKind::NatLiteral(15)); + } + + assert_eq!(expression_kind(" /*gay*/ true").unwrap().1, ExpressionKind::BoolLiteral(true)); + assert_eq!(expression_kind(" /*yolo*/ barnaby").unwrap().1, ExpressionKind::Value(qn!(barnaby))); } - /* #[test] fn combinator_test3() { + let source = "{}"; + assert_eq!(block(source).unwrap().1, vec![].into()); let source = r#"{ - 4_5 - 11; 0xf + //hella + 4_5 //bog + 11; /*chutney*/0xf }"#; let parsed = block(source).map_err(|err| match err { Err::Error(err) | Err::Failure(err) => nom::error::convert_error(source, err), _ => panic!() }); - //let parsed = block(source); if let Err(err) = parsed { println!("{}", err); @@ -239,5 +299,4 @@ mod test { ExpressionKind::NatLiteral(15))) }, ].into()); } - */ } diff --git a/schala-lang/src/parsing/mod.rs b/schala-lang/src/parsing/mod.rs index 93d6c11..bfe9371 100644 --- a/schala-lang/src/parsing/mod.rs +++ b/schala-lang/src/parsing/mod.rs @@ -1,6 +1,6 @@ #![allow(clippy::upper_case_acronyms)] -mod combinator; +pub mod combinator; mod peg_parser; mod test; diff --git a/schala-lang/src/parsing/test.rs b/schala-lang/src/parsing/test.rs index 4169e1e..299b129 100644 --- a/schala-lang/src/parsing/test.rs +++ b/schala-lang/src/parsing/test.rs @@ -1352,3 +1352,5 @@ fn backtick_operators() { assert_eq!(output, vec![digit!("1"), op!("plus"), digit!("2")]); } */ + +