Using nom parser
This commit is contained in:
parent
e6a9811ee5
commit
1cdaaee9a6
24
Cargo.lock
generated
24
Cargo.lock
generated
@ -397,9 +397,15 @@ checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memchr"
|
name = "memchr"
|
||||||
version = "2.2.1"
|
version = "2.4.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
|
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "minimal-lexical"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "miniz-sys"
|
name = "miniz-sys"
|
||||||
@ -462,6 +468,17 @@ dependencies = [
|
|||||||
"version_check 0.1.5",
|
"version_check 0.1.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nom"
|
||||||
|
version = "7.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1b1d11e1ef389c76fe5b81bcaf2ea32cf88b62bc494e19f493d0b30e7a930109"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
"minimal-lexical",
|
||||||
|
"version_check 0.9.3",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num"
|
name = "num"
|
||||||
version = "0.1.42"
|
version = "0.1.42"
|
||||||
@ -919,6 +936,7 @@ dependencies = [
|
|||||||
"ena",
|
"ena",
|
||||||
"failure",
|
"failure",
|
||||||
"itertools",
|
"itertools",
|
||||||
|
"nom 7.1.0",
|
||||||
"peg",
|
"peg",
|
||||||
"pretty_assertions",
|
"pretty_assertions",
|
||||||
"radix_trie",
|
"radix_trie",
|
||||||
@ -1068,7 +1086,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "8e51065bafd2abe106b6036483b69d1741f4a1ec56ce8a2378de341637de689e"
|
checksum = "8e51065bafd2abe106b6036483b69d1741f4a1ec56ce8a2378de341637de689e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"fnv",
|
"fnv",
|
||||||
"nom",
|
"nom 4.2.3",
|
||||||
"phf",
|
"phf",
|
||||||
"phf_codegen",
|
"phf_codegen",
|
||||||
]
|
]
|
||||||
|
@ -16,6 +16,7 @@ radix_trie = "0.1.5"
|
|||||||
assert_matches = "1.5"
|
assert_matches = "1.5"
|
||||||
#peg = "0.7.0"
|
#peg = "0.7.0"
|
||||||
peg = { git = "https://github.com/kevinmehall/rust-peg", rev = "960222580c8da25b17d32c2aae6f52f902728b62" }
|
peg = { git = "https://github.com/kevinmehall/rust-peg", rev = "960222580c8da25b17d32c2aae6f52f902728b62" }
|
||||||
|
nom = "7.1.0"
|
||||||
|
|
||||||
|
|
||||||
schala-repl = { path = "../schala-repl" }
|
schala-repl = { path = "../schala-repl" }
|
||||||
|
243
schala-lang/src/parsing/combinator.rs
Normal file
243
schala-lang/src/parsing/combinator.rs
Normal file
@ -0,0 +1,243 @@
|
|||||||
|
use nom::{
|
||||||
|
Err,
|
||||||
|
branch::alt,
|
||||||
|
bytes::complete::tag,
|
||||||
|
character::complete::{char, one_of, space0, space1, multispace0, line_ending},
|
||||||
|
combinator::{peek, not, value, map},
|
||||||
|
error::{context, VerboseError, ParseError},
|
||||||
|
multi::{fold_many1, many1, many0, separated_list1},
|
||||||
|
sequence::{tuple, preceded},
|
||||||
|
IResult, Parser,
|
||||||
|
};
|
||||||
|
|
||||||
|
type ParseResult<'a, O> = IResult<&'a str, O, VerboseError<&'a str>>;
|
||||||
|
|
||||||
|
use crate::ast::*;
|
||||||
|
|
||||||
|
/*
|
||||||
|
fn block(input: &str) -> ParseResult<Block> {
|
||||||
|
context("block",
|
||||||
|
map(
|
||||||
|
tuple((
|
||||||
|
char('{'),
|
||||||
|
value((), context("TOP", many0(alt((line_separator, ws))))),
|
||||||
|
block_items,
|
||||||
|
value((), many0(alt((line_separator, ws)))),
|
||||||
|
char('}'),
|
||||||
|
)), |(_, _, items, _, _)| items.into()))(input)
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
fn tok<'a, O>(input_parser: impl Parser<&'a str, O, VerboseError<&'a str>>) -> impl FnMut(&'a str) -> IResult<&'a str, O, VerboseError<&'a str>> {
|
||||||
|
map(tuple((ws0, input_parser)), |(_, output)| output)
|
||||||
|
}
|
||||||
|
|
||||||
|
// whitespace does consume at least one piece of whitespace - use ws0 for maybe none
|
||||||
|
fn whitespace(input: &str) -> ParseResult<()> {
|
||||||
|
context("whitespace",
|
||||||
|
alt((
|
||||||
|
value((), space1),
|
||||||
|
line_comment,
|
||||||
|
block_comment,
|
||||||
|
)))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ws0(input: &str) -> ParseResult<()> {
|
||||||
|
context("WS0",
|
||||||
|
value((), many0(whitespace)))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn line_comment(input: &str) -> ParseResult<()> {
|
||||||
|
value((),
|
||||||
|
tuple((tag("//"), many0(not(line_ending)), peek(line_ending)))
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn block_comment(input: &str) -> ParseResult<()> {
|
||||||
|
value((),
|
||||||
|
tuple((
|
||||||
|
tag("/*"),
|
||||||
|
many0(alt((
|
||||||
|
block_comment,
|
||||||
|
not(tag("*/"))
|
||||||
|
))),
|
||||||
|
tag("*/")
|
||||||
|
)))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn line_separator(input: &str) -> ParseResult<()> {
|
||||||
|
alt((value((), line_ending), value((), char(';'))))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn block_items(input: &str) -> ParseResult<Vec<Statement>> {
|
||||||
|
context("Block-item",
|
||||||
|
separated_list1(
|
||||||
|
preceded(context("LLLL", ws0), many1(line_separator)),
|
||||||
|
statement,
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn statement(input: &str) -> ParseResult<Statement> {
|
||||||
|
context("Parsing-statement",
|
||||||
|
map(
|
||||||
|
tuple((
|
||||||
|
ws0,
|
||||||
|
expression_kind,
|
||||||
|
ws0
|
||||||
|
)),|(_, kind, _)| Statement {
|
||||||
|
id: Default::default(),
|
||||||
|
location: Default::default(),
|
||||||
|
kind: StatementKind::Expression(Expression::new(Default::default(), kind)),
|
||||||
|
}))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn expression_kind(input: &str) -> ParseResult<ExpressionKind> {
|
||||||
|
context("expression-kind",
|
||||||
|
alt((
|
||||||
|
number_literal,
|
||||||
|
bool_literal,
|
||||||
|
)))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bool_literal(input: &str) -> ParseResult<ExpressionKind> {
|
||||||
|
alt((
|
||||||
|
map(tok(tag("true")), |_| ExpressionKind::BoolLiteral(true)),
|
||||||
|
map(tok(tag("false")), |_| ExpressionKind::BoolLiteral(false)),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn number_literal(input: &str) -> ParseResult<ExpressionKind> {
|
||||||
|
map(alt((tok(hex_literal), tok(bin_literal), tok(dec_literal))), ExpressionKind::NatLiteral)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dec_literal(input: &str) -> ParseResult<u64> {
|
||||||
|
map(digits(digit_group_dec), |chars: Vec<char>| {
|
||||||
|
let s: String = chars.into_iter().collect();
|
||||||
|
s.parse().unwrap()
|
||||||
|
})(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hex_literal(input: &str) -> ParseResult<u64> {
|
||||||
|
map(preceded(alt((tag("0x"), tag("0X"))), digits(digit_group_hex)), |chars: Vec<char>| {
|
||||||
|
let s: String = chars.into_iter().collect();
|
||||||
|
parse_hex(&s).unwrap()
|
||||||
|
})(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bin_literal(input: &str) -> ParseResult<u64> {
|
||||||
|
map(preceded(alt((tag("0b"), tag("0B"))), digits(digit_group_bin)), |chars: Vec<char>| {
|
||||||
|
let s: String = chars.into_iter().collect();
|
||||||
|
parse_binary(&s).unwrap()
|
||||||
|
})(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn digits<'a, E: ParseError<&'a str>>(
|
||||||
|
digit_type: impl Parser<&'a str, Vec<char>, E>,
|
||||||
|
) -> impl FnMut(&'a str) -> IResult<&'a str, Vec<char>, E> {
|
||||||
|
map(separated_list1(many1(char('_')), digit_type), |items: Vec<Vec<char>>| {
|
||||||
|
items.into_iter().flatten().collect()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn digit_group_dec(input: &str) -> ParseResult<Vec<char>> {
|
||||||
|
many1(one_of("0123456789"))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn digit_group_hex(input: &str) -> ParseResult<Vec<char>> {
|
||||||
|
many1(one_of("0123456789abcdefABCDEF"))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn digit_group_bin(input: &str) -> ParseResult<Vec<char>> {
|
||||||
|
many1(one_of("01"))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_binary(digits: &str) -> Result<u64, &'static str> {
|
||||||
|
let mut result: u64 = 0;
|
||||||
|
let mut multiplier = 1;
|
||||||
|
for d in digits.chars().rev() {
|
||||||
|
match d {
|
||||||
|
'1' => result += multiplier,
|
||||||
|
'0' => (),
|
||||||
|
'_' => continue,
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
multiplier = match multiplier.checked_mul(2) {
|
||||||
|
Some(m) => m,
|
||||||
|
None => return Err("Binary expression will overflow"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_hex(digits: &str) -> Result<u64, &'static str> {
|
||||||
|
let mut result: u64 = 0;
|
||||||
|
let mut multiplier: u64 = 1;
|
||||||
|
for d in digits.chars().rev() {
|
||||||
|
if d == '_' {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match d.to_digit(16) {
|
||||||
|
Some(n) => result += n as u64 * multiplier,
|
||||||
|
None => return Err("Internal parser error: invalid hex digit"),
|
||||||
|
}
|
||||||
|
multiplier = match multiplier.checked_mul(16) {
|
||||||
|
Some(m) => m,
|
||||||
|
None => return Err("Hexadecimal expression will overflow"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use pretty_assertions::assert_eq;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn combinator_test1() {
|
||||||
|
assert_eq!(digits(digit_group_dec)("342").unwrap().1, vec!['3', '4', '2']);
|
||||||
|
assert_eq!(bin_literal("0b1111qsdf"), Ok(("qsdf", 15)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn combinator_test2() {
|
||||||
|
for s in ["15", "0b1111", "1_5_", "0XF__", "0Xf"].iter() {
|
||||||
|
assert_eq!(expression_kind(s).unwrap().1, ExpressionKind::NatLiteral(15));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
#[test]
|
||||||
|
fn combinator_test3() {
|
||||||
|
let source = r#"{
|
||||||
|
|
||||||
|
4_5
|
||||||
|
11; 0xf
|
||||||
|
}"#;
|
||||||
|
let parsed = block(source).map_err(|err| match err {
|
||||||
|
Err::Error(err) | Err::Failure(err) => nom::error::convert_error(source, err),
|
||||||
|
_ => panic!()
|
||||||
|
});
|
||||||
|
//let parsed = block(source);
|
||||||
|
|
||||||
|
if let Err(err) = parsed {
|
||||||
|
println!("{}", err);
|
||||||
|
panic!("parse error desu!");
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(parsed.unwrap().1, vec![
|
||||||
|
Statement { id: Default::default(), location:
|
||||||
|
Default::default(), kind: StatementKind::Expression(Expression::new(Default::default(),
|
||||||
|
ExpressionKind::NatLiteral(45))) },
|
||||||
|
Statement { id: Default::default(), location:
|
||||||
|
Default::default(), kind: StatementKind::Expression(Expression::new(Default::default(),
|
||||||
|
ExpressionKind::NatLiteral(11))) },
|
||||||
|
Statement { id: Default::default(), location:
|
||||||
|
Default::default(), kind: StatementKind::Expression(Expression::new(Default::default(),
|
||||||
|
ExpressionKind::NatLiteral(15))) },
|
||||||
|
].into());
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
}
|
@ -1,5 +1,6 @@
|
|||||||
#![allow(clippy::upper_case_acronyms)]
|
#![allow(clippy::upper_case_acronyms)]
|
||||||
|
|
||||||
|
mod combinator;
|
||||||
mod peg_parser;
|
mod peg_parser;
|
||||||
mod test;
|
mod test;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user