2022-10-15 23:36:04 -07:00
|
|
|
#![feature(assert_matches)]
|
|
|
|
#![allow(dead_code)] //TODO eventually turn this off
|
2022-10-16 01:37:51 -07:00
|
|
|
mod bnf;
|
|
|
|
|
|
|
|
use bnf::Bnf;
|
2022-10-16 00:30:06 -07:00
|
|
|
use std::rc::Rc;
|
2022-10-15 23:36:04 -07:00
|
|
|
|
|
|
|
type ParseResult<I, O, E> = Result<(O, I), E>;
|
|
|
|
|
|
|
|
trait Parser<I, O, E> {
|
|
|
|
fn parse(&self, input: I) -> ParseResult<I, O, E>;
|
2022-10-16 01:37:51 -07:00
|
|
|
fn bnf(&self) -> Option<Bnf> {
|
|
|
|
None
|
|
|
|
}
|
2022-10-16 00:54:41 -07:00
|
|
|
|
|
|
|
fn map<'a, F, O2>(self, map_fn: F) -> BoxedParser<'a, I, O2, E>
|
|
|
|
where
|
|
|
|
Self: Sized + 'a,
|
|
|
|
I: 'a,
|
|
|
|
E: 'a,
|
|
|
|
O: 'a,
|
|
|
|
O2: 'a,
|
|
|
|
F: Fn(O) -> O2 + 'a,
|
|
|
|
{
|
|
|
|
BoxedParser::new(map(self, map_fn))
|
|
|
|
}
|
2022-10-16 01:10:48 -07:00
|
|
|
|
2022-10-16 01:29:48 -07:00
|
|
|
fn to<'a, O2>(self, item: O2) -> BoxedParser<'a, I, O2, E>
|
|
|
|
where
|
|
|
|
Self: Sized + 'a,
|
|
|
|
I: 'a,
|
|
|
|
O: 'a,
|
|
|
|
O2: Clone + 'a,
|
|
|
|
E: 'a,
|
|
|
|
{
|
|
|
|
self.map(move |_| item.clone())
|
|
|
|
}
|
|
|
|
|
2022-10-16 01:10:48 -07:00
|
|
|
fn then<'a, P, O2>(self, next_parser: P) -> BoxedParser<'a, I, (O, O2), E>
|
|
|
|
where
|
|
|
|
Self: Sized + 'a,
|
|
|
|
I: 'a,
|
|
|
|
O: 'a,
|
|
|
|
O2: 'a,
|
|
|
|
E: 'a,
|
|
|
|
P: Parser<I, O2, E> + 'a,
|
|
|
|
{
|
|
|
|
BoxedParser::new(seq(self, next_parser))
|
|
|
|
}
|
2022-10-15 23:36:04 -07:00
|
|
|
}
|
|
|
|
|
2022-10-16 00:54:41 -07:00
|
|
|
struct BoxedParser<'a, I, O, E> {
|
|
|
|
inner: Box<dyn Parser<I, O, E> + 'a>,
|
2022-10-16 00:44:47 -07:00
|
|
|
}
|
|
|
|
|
2022-10-16 00:54:41 -07:00
|
|
|
impl<'a, I, O, E> BoxedParser<'a, I, O, E> {
|
2022-10-16 00:44:47 -07:00
|
|
|
fn new<P>(inner: P) -> Self
|
|
|
|
where
|
2022-10-16 00:54:41 -07:00
|
|
|
P: Parser<I, O, E> + 'a,
|
2022-10-16 00:44:47 -07:00
|
|
|
{
|
|
|
|
BoxedParser {
|
|
|
|
inner: Box::new(inner),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-16 00:54:41 -07:00
|
|
|
impl<'a, I, O, E> Parser<I, O, E> for BoxedParser<'a, I, O, E> {
|
2022-10-16 00:44:47 -07:00
|
|
|
fn parse(&self, input: I) -> ParseResult<I, O, E> {
|
|
|
|
self.inner.parse(input)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-15 23:41:22 -07:00
|
|
|
impl<I, O, E, F> Parser<I, O, E> for F
|
|
|
|
where
|
|
|
|
F: Fn(I) -> ParseResult<I, O, E>,
|
|
|
|
{
|
2022-10-15 23:36:04 -07:00
|
|
|
fn parse(&self, input: I) -> ParseResult<I, O, E> {
|
|
|
|
self(input)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-16 00:30:06 -07:00
|
|
|
impl<I, O, E, T> Parser<I, O, E> for Rc<T>
|
|
|
|
where
|
|
|
|
T: Parser<I, O, E>,
|
|
|
|
{
|
|
|
|
fn parse(&self, input: I) -> ParseResult<I, O, E> {
|
|
|
|
self.as_ref().parse(input)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-15 23:41:22 -07:00
|
|
|
fn literal(expected: &'static str) -> impl Fn(&str) -> ParseResult<&str, &str, &str> {
|
2022-10-15 23:36:04 -07:00
|
|
|
move |input| match input.get(0..expected.len()) {
|
2022-10-15 23:41:22 -07:00
|
|
|
Some(next) if next == expected => Ok((expected, &input[expected.len()..])),
|
|
|
|
_ => Err(input),
|
2022-10-15 23:36:04 -07:00
|
|
|
}
|
2022-10-10 00:13:39 -07:00
|
|
|
}
|
|
|
|
|
2022-10-15 23:41:22 -07:00
|
|
|
fn map<P, F, I, O1, O2, E>(parser: P, map_fn: F) -> impl Parser<I, O2, E>
|
|
|
|
where
|
|
|
|
P: Parser<I, O1, E>,
|
|
|
|
F: Fn(O1) -> O2,
|
|
|
|
{
|
2022-10-15 23:58:05 -07:00
|
|
|
move |input| {
|
|
|
|
parser
|
|
|
|
.parse(input)
|
|
|
|
.map(|(result, rest)| (map_fn(result), rest))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn seq<P1, P2, I, O1, O2, E>(parser1: P1, parser2: P2) -> impl Parser<I, (O1, O2), E>
|
|
|
|
where
|
|
|
|
P1: Parser<I, O1, E>,
|
|
|
|
P2: Parser<I, O2, E>,
|
|
|
|
{
|
|
|
|
move |input| {
|
|
|
|
parser1.parse(input).and_then(|(result1, rest1)| {
|
|
|
|
parser2
|
|
|
|
.parse(rest1)
|
|
|
|
.map(|(result2, rest2)| ((result1, result2), rest2))
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-16 00:39:41 -07:00
|
|
|
fn pred<P, F, I, O>(parser: P, pred_fn: F) -> impl Parser<I, O, I>
|
|
|
|
where
|
|
|
|
P: Parser<I, O, I>,
|
|
|
|
F: Fn(&O) -> bool,
|
|
|
|
{
|
|
|
|
move |input| {
|
|
|
|
parser.parse(input).and_then(|(result, rest)| {
|
|
|
|
if pred_fn(&result) {
|
|
|
|
Ok((result, rest))
|
|
|
|
} else {
|
|
|
|
Err(rest)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-16 00:30:06 -07:00
|
|
|
fn zero_or_more<P, I, O>(parser: P) -> impl Parser<I, Vec<O>, I>
|
2022-10-16 00:23:01 -07:00
|
|
|
where
|
|
|
|
P: Parser<I, O, I>,
|
|
|
|
I: Copy,
|
|
|
|
{
|
|
|
|
move |mut input| {
|
|
|
|
let mut results = Vec::new();
|
|
|
|
|
|
|
|
while let Ok((item, rest)) = parser.parse(input) {
|
|
|
|
results.push(item);
|
|
|
|
input = rest;
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok((results, input))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-16 00:30:06 -07:00
|
|
|
fn one_or_more<P, I, O>(parser: P) -> impl Parser<I, Vec<O>, I>
|
|
|
|
where
|
|
|
|
P: Parser<I, O, I>,
|
|
|
|
I: Copy,
|
|
|
|
{
|
|
|
|
let parser = std::rc::Rc::new(parser);
|
|
|
|
map(
|
|
|
|
seq(parser.clone(), zero_or_more(parser)),
|
|
|
|
|(first, rest)| {
|
|
|
|
let mut output = vec![first];
|
|
|
|
output.extend(rest.into_iter());
|
|
|
|
output
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2022-10-15 23:58:05 -07:00
|
|
|
/// Parses a standard identifier in a programming language
|
|
|
|
fn identifier(input: &str) -> ParseResult<&str, String, &str> {
|
|
|
|
let mut chars = input.chars();
|
|
|
|
let mut buf = String::new();
|
|
|
|
|
|
|
|
match chars.next() {
|
|
|
|
Some(ch) if ch.is_alphabetic() => buf.push(ch),
|
|
|
|
_ => return Err(input),
|
|
|
|
}
|
|
|
|
|
|
|
|
while let Some(next) = chars.next() {
|
|
|
|
if next.is_alphanumeric() {
|
|
|
|
buf.push(next);
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let next_index = buf.len();
|
|
|
|
Ok((buf, &input[next_index..]))
|
2022-10-15 23:41:22 -07:00
|
|
|
}
|
|
|
|
|
2022-10-16 00:39:41 -07:00
|
|
|
fn any_char(input: &str) -> ParseResult<&str, char, &str> {
|
|
|
|
match input.chars().next() {
|
|
|
|
Some(ch) => Ok((ch, &input[ch.len_utf8()..])),
|
|
|
|
None => Err(input),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-16 01:29:48 -07:00
|
|
|
fn choice<P1, P2, I, O, E>(parser1: P1, parser2: P2) -> impl Parser<I, O, E>
|
|
|
|
where
|
|
|
|
P1: Parser<I, O, E>,
|
|
|
|
P2: Parser<I, O, E>,
|
|
|
|
I: Copy,
|
|
|
|
{
|
|
|
|
move |input| match parser1.parse(input) {
|
|
|
|
ok @ Ok(..) => ok,
|
|
|
|
Err(_e) => parser2.parse(input),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-10 00:13:39 -07:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
2022-10-15 23:36:04 -07:00
|
|
|
use std::assert_matches::assert_matches;
|
2022-10-16 01:42:03 -07:00
|
|
|
use std::collections::HashMap;
|
2022-10-10 00:13:39 -07:00
|
|
|
|
|
|
|
#[test]
|
2022-10-15 23:41:22 -07:00
|
|
|
fn test_parsing() {
|
2022-10-15 23:36:04 -07:00
|
|
|
let output = literal("a")("a yolo");
|
2022-10-15 23:41:22 -07:00
|
|
|
assert_matches!(output.unwrap(), ("a", " yolo"));
|
|
|
|
}
|
|
|
|
|
2022-10-15 23:58:05 -07:00
|
|
|
#[test]
|
|
|
|
fn test_identifier() {
|
|
|
|
assert_matches!(identifier("bongo1beans"), Ok((s, "")) if s == "bongo1beans");
|
|
|
|
assert_matches!(identifier("2bongo1beans"), Err("2bongo1beans"));
|
|
|
|
}
|
|
|
|
|
2022-10-15 23:41:22 -07:00
|
|
|
#[test]
|
|
|
|
fn test_map() {
|
|
|
|
let lit_a = literal("a");
|
2022-10-16 00:54:41 -07:00
|
|
|
let output = lit_a.map(|s| s.to_uppercase()).parse("a yolo");
|
2022-10-15 23:41:22 -07:00
|
|
|
assert_matches!(output.unwrap(), (s, " yolo") if s == "A");
|
2022-10-10 00:13:39 -07:00
|
|
|
}
|
2022-10-15 23:58:05 -07:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_seq() {
|
|
|
|
let p = seq(identifier, seq(literal(" "), literal("ruts")));
|
|
|
|
assert_matches!(p.parse("fort1 ruts"), Ok((r, "")) if r.0 == "fort1" && r.1 == (" ", "ruts") );
|
2022-10-16 01:10:48 -07:00
|
|
|
|
|
|
|
let p = identifier.then(literal(" ")).then(literal("ruts"));
|
|
|
|
assert_matches!(p.parse("fort1 ruts"), Ok((r, "")) if r.0.0 == "fort1" && r.0.1== " " && r.1 == "ruts");
|
2022-10-15 23:58:05 -07:00
|
|
|
}
|
2022-10-16 00:23:01 -07:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_one_or_more() {
|
|
|
|
let p = one_or_more(literal("bongo "));
|
|
|
|
let input = "bongo bongo bongo bongo bongo ";
|
|
|
|
assert_matches!(p.parse(input), Ok((v, "")) if v.len() == 5);
|
2022-10-16 00:30:06 -07:00
|
|
|
let input = "bongo ecks";
|
|
|
|
assert_matches!(p.parse(input), Ok((v, "ecks")) if v.len() == 1);
|
2022-10-16 00:23:01 -07:00
|
|
|
}
|
2022-10-16 00:39:41 -07:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_pred() {
|
|
|
|
let p = pred(any_char, |c| *c == 'f');
|
|
|
|
assert_eq!(p.parse("frog"), Ok(('f', "rog")));
|
|
|
|
}
|
2022-10-16 01:29:48 -07:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_choice() {
|
|
|
|
let p = choice(literal("gnostika").to(1), one_or_more(literal(" ")).to(2));
|
|
|
|
assert_eq!(p.parse("gnostika twentynine"), Ok((1, " twentynine")));
|
|
|
|
}
|
2022-10-16 01:36:20 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* JSON BNF
|
|
|
|
* <JSON> ::= <value>
|
|
|
|
<value> ::= <object> | <array> | <boolean> | <string> | <number> | <null>
|
|
|
|
<array> ::= "[" [<value>] {"," <value>}* "]"
|
|
|
|
<object> ::= "{" [<property>] {"," <property>}* "}"
|
|
|
|
<property> ::= <string> ":" <value>
|
|
|
|
*/
|
2022-10-16 01:42:03 -07:00
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
enum JsonValue {
|
|
|
|
Null,
|
|
|
|
Bool(bool),
|
|
|
|
Str(String),
|
|
|
|
Num(f64),
|
|
|
|
Array(Vec<JsonValue>),
|
|
|
|
Object(HashMap<String, JsonValue>),
|
|
|
|
}
|
2022-10-16 01:36:20 -07:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn parse_json() {
|
2022-10-16 01:42:03 -07:00
|
|
|
let json_null = literal("null").to(JsonValue::Null);
|
|
|
|
let json_true = literal("true").to(JsonValue::Bool(true));
|
|
|
|
let json_false = literal("false").to(JsonValue::Bool(false));
|
2022-10-16 01:36:20 -07:00
|
|
|
|
|
|
|
let json_value = choice(json_null, choice(json_true, json_false));
|
|
|
|
|
2022-10-16 01:42:03 -07:00
|
|
|
assert_matches!(json_value.parse("true"), Ok((JsonValue::Bool(true), "")));
|
2022-10-16 01:36:20 -07:00
|
|
|
}
|
2022-10-10 00:13:39 -07:00
|
|
|
}
|