2021-11-17 12:45:55 -08:00
|
|
|
use std::{cell::RefCell, rc::Rc};
|
|
|
|
|
2021-11-16 20:23:27 -08:00
|
|
|
use nom::{
|
|
|
|
branch::alt,
|
2021-11-17 12:45:55 -08:00
|
|
|
bytes::complete::{tag, take_till},
|
|
|
|
character::complete::{
|
|
|
|
alpha1, alphanumeric0, char, line_ending, multispace0, none_of, not_line_ending, one_of, space0,
|
|
|
|
space1,
|
|
|
|
},
|
|
|
|
combinator::{map, not, opt, peek, recognize, value},
|
|
|
|
error::{context, ParseError, VerboseError},
|
|
|
|
multi::{fold_many1, many0, many1, separated_list0, separated_list1},
|
|
|
|
sequence::{pair, preceded, tuple},
|
|
|
|
Err, IResult, Parser,
|
2021-11-16 20:23:27 -08:00
|
|
|
};
|
2021-11-17 03:59:16 -08:00
|
|
|
use nom_locate::{position, LocatedSpan};
|
2021-11-17 12:45:55 -08:00
|
|
|
|
2021-11-17 12:45:42 -08:00
|
|
|
use crate::identifier::{Id, IdStore};
|
2021-11-16 20:23:27 -08:00
|
|
|
|
2021-11-17 12:45:42 -08:00
|
|
|
type StoreRef = Rc<RefCell<IdStore<ASTItem>>>;
|
|
|
|
|
|
|
|
type Span<'a> = LocatedSpan<&'a str, StoreRef>;
|
2021-11-17 03:59:16 -08:00
|
|
|
type ParseResult<'a, O> = IResult<Span<'a>, O, VerboseError<Span<'a>>>;
|
2021-11-16 20:23:27 -08:00
|
|
|
|
|
|
|
use crate::ast::*;
|
|
|
|
|
2021-11-17 01:54:35 -08:00
|
|
|
fn rc_string(s: &str) -> Rc<String> {
|
|
|
|
Rc::new(s.to_string())
|
2021-11-16 20:23:27 -08:00
|
|
|
}
|
2021-11-17 12:45:42 -08:00
|
|
|
fn fresh_id(span: &Span) -> Id<ASTItem> {
|
|
|
|
let mut table_handle = span.extra.borrow_mut();
|
|
|
|
table_handle.fresh()
|
|
|
|
}
|
2021-11-16 20:23:27 -08:00
|
|
|
|
2021-11-17 12:45:55 -08:00
|
|
|
fn tok<'a, O>(
|
|
|
|
input_parser: impl Parser<Span<'a>, O, VerboseError<Span<'a>>>,
|
|
|
|
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, O, VerboseError<Span<'a>>> {
|
|
|
|
context("tok", map(tuple((ws0, input_parser)), |(_, output)| output))
|
2021-11-17 01:54:35 -08:00
|
|
|
}
|
2021-11-16 20:23:27 -08:00
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn kw<'a>(keyword_str: &'static str) -> impl FnMut(Span<'a>) -> ParseResult<()> {
|
2021-11-17 12:45:55 -08:00
|
|
|
context("keyword", tok(value((), tag(keyword_str))))
|
2021-11-16 20:23:27 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
// whitespace does consume at least one piece of whitespace - use ws0 for maybe none
|
2021-11-17 03:59:16 -08:00
|
|
|
fn whitespace(input: Span) -> ParseResult<()> {
|
2021-11-17 12:45:55 -08:00
|
|
|
context("whitespace", alt((block_comment, line_comment, value((), space1))))(input)
|
2021-11-16 20:23:27 -08:00
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn ws0(input: Span) -> ParseResult<()> {
|
2021-11-17 12:45:55 -08:00
|
|
|
context("WS0", value((), many0(whitespace)))(input)
|
2021-11-16 20:23:27 -08:00
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn line_comment(input: Span) -> ParseResult<()> {
|
2021-11-17 12:45:55 -08:00
|
|
|
value((), tuple((tag("//"), not_line_ending)))(input)
|
2021-11-16 20:23:27 -08:00
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn block_comment(input: Span) -> ParseResult<()> {
|
2021-11-17 12:45:55 -08:00
|
|
|
context(
|
|
|
|
"Block-comment",
|
|
|
|
value(
|
|
|
|
(),
|
|
|
|
tuple((
|
|
|
|
tag("/*"),
|
|
|
|
many0(alt((value((), none_of("*/")), value((), none_of("/*")), block_comment))),
|
|
|
|
tag("*/"),
|
|
|
|
)),
|
|
|
|
),
|
|
|
|
)(input)
|
2021-11-16 20:23:27 -08:00
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn statement_delimiter(input: Span) -> ParseResult<()> {
|
2021-11-17 12:45:55 -08:00
|
|
|
tok(alt((value((), line_ending), value((), char(';')))))(input)
|
2021-11-16 20:23:27 -08:00
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn block(input: Span) -> ParseResult<Block> {
|
2021-11-17 12:45:55 -08:00
|
|
|
context(
|
|
|
|
"block",
|
|
|
|
map(
|
|
|
|
tuple((
|
|
|
|
tok(char('{')),
|
|
|
|
many0(statement_delimiter),
|
|
|
|
separated_list0(statement_delimiter, statement),
|
|
|
|
many0(statement_delimiter),
|
|
|
|
tok(char('}')),
|
|
|
|
)),
|
|
|
|
|(_, _, items, _, _)| items.into(),
|
|
|
|
),
|
|
|
|
)(input)
|
2021-11-16 20:23:27 -08:00
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn statement(input: Span) -> ParseResult<Statement> {
|
2021-11-17 12:45:42 -08:00
|
|
|
let (input, pos) = position(input)?;
|
|
|
|
let pos: usize = pos.location_offset();
|
|
|
|
let id = fresh_id(&input);
|
2021-11-17 12:45:55 -08:00
|
|
|
context(
|
|
|
|
"Parsing-statement",
|
|
|
|
map(expression, move |expr| Statement {
|
|
|
|
id,
|
|
|
|
location: pos.into(),
|
|
|
|
kind: StatementKind::Expression(expr),
|
|
|
|
}),
|
|
|
|
)(input)
|
2021-11-16 20:23:27 -08:00
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn expression(input: Span) -> ParseResult<Expression> {
|
2021-11-17 12:45:42 -08:00
|
|
|
let id = fresh_id(&input);
|
2021-11-17 12:45:55 -08:00
|
|
|
map(pair(expression_kind, opt(type_anno)), move |(kind, maybe_anno)| Expression::new(id, kind))(input)
|
2021-11-17 03:40:43 -08:00
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn type_anno(input: Span) -> ParseResult<TypeIdentifier> {
|
2021-11-17 03:40:43 -08:00
|
|
|
preceded(kw(":"), type_identifier)(input)
|
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn type_identifier(input: Span) -> ParseResult<TypeIdentifier> {
|
2021-11-17 03:40:43 -08:00
|
|
|
/*
|
|
|
|
alt((
|
|
|
|
tuple((kw("("), separated_list0(kw(","), type_identifier), kw(")"))),
|
|
|
|
type_singleton_name
|
|
|
|
))(input)
|
|
|
|
*/
|
|
|
|
unimplemented!()
|
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn type_singleton_name(input: Span) -> ParseResult<TypeSingletonName> {
|
2021-11-17 03:40:43 -08:00
|
|
|
unimplemented!()
|
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
pub fn expression_kind(input: Span) -> ParseResult<ExpressionKind> {
|
2021-11-17 01:54:35 -08:00
|
|
|
context("expression-kind", primary_expr)(input)
|
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn primary_expr(input: Span) -> ParseResult<ExpressionKind> {
|
2021-11-17 12:45:55 -08:00
|
|
|
context("primary-expr", alt((number_literal, bool_literal, identifier_expr)))(input)
|
2021-11-17 01:54:35 -08:00
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn identifier_expr(input: Span) -> ParseResult<ExpressionKind> {
|
2021-11-17 01:54:35 -08:00
|
|
|
context("identifier-expr", map(qualified_identifier, ExpressionKind::Value))(input)
|
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn qualified_identifier(input: Span) -> ParseResult<QualifiedName> {
|
2021-11-17 12:45:42 -08:00
|
|
|
let id = fresh_id(&input);
|
2021-11-17 12:45:55 -08:00
|
|
|
tok(map(separated_list1(tag("::"), map(identifier, |x| rc_string(x.fragment()))), move |items| {
|
|
|
|
QualifiedName { id, components: items }
|
|
|
|
}))(input)
|
2021-11-17 01:54:35 -08:00
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn identifier(input: Span) -> ParseResult<Span> {
|
2021-11-17 12:45:55 -08:00
|
|
|
recognize(tuple((alt((tag("_"), alpha1)), alphanumeric0)))(input)
|
2021-11-16 20:23:27 -08:00
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn bool_literal(input: Span) -> ParseResult<ExpressionKind> {
|
2021-11-17 12:45:55 -08:00
|
|
|
context(
|
|
|
|
"bool-literal",
|
|
|
|
alt((
|
|
|
|
map(kw("true"), |_| ExpressionKind::BoolLiteral(true)),
|
|
|
|
map(kw("false"), |_| ExpressionKind::BoolLiteral(false)),
|
|
|
|
)),
|
|
|
|
)(input)
|
2021-11-16 20:23:27 -08:00
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn number_literal(input: Span) -> ParseResult<ExpressionKind> {
|
2021-11-16 20:23:27 -08:00
|
|
|
map(alt((tok(hex_literal), tok(bin_literal), tok(dec_literal))), ExpressionKind::NatLiteral)(input)
|
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn dec_literal(input: Span) -> ParseResult<u64> {
|
2021-11-16 20:23:27 -08:00
|
|
|
map(digits(digit_group_dec), |chars: Vec<char>| {
|
|
|
|
let s: String = chars.into_iter().collect();
|
|
|
|
s.parse().unwrap()
|
|
|
|
})(input)
|
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn hex_literal(input: Span) -> ParseResult<u64> {
|
2021-11-16 20:23:27 -08:00
|
|
|
map(preceded(alt((tag("0x"), tag("0X"))), digits(digit_group_hex)), |chars: Vec<char>| {
|
|
|
|
let s: String = chars.into_iter().collect();
|
|
|
|
parse_hex(&s).unwrap()
|
|
|
|
})(input)
|
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn bin_literal(input: Span) -> ParseResult<u64> {
|
2021-11-16 20:23:27 -08:00
|
|
|
map(preceded(alt((tag("0b"), tag("0B"))), digits(digit_group_bin)), |chars: Vec<char>| {
|
|
|
|
let s: String = chars.into_iter().collect();
|
|
|
|
parse_binary(&s).unwrap()
|
|
|
|
})(input)
|
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn digits<'a, E: ParseError<Span<'a>>>(
|
|
|
|
digit_type: impl Parser<Span<'a>, Vec<char>, E>,
|
|
|
|
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Vec<char>, E> {
|
2021-11-16 20:23:27 -08:00
|
|
|
map(separated_list1(many1(char('_')), digit_type), |items: Vec<Vec<char>>| {
|
|
|
|
items.into_iter().flatten().collect()
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn digit_group_dec(input: Span) -> ParseResult<Vec<char>> {
|
2021-11-16 20:23:27 -08:00
|
|
|
many1(one_of("0123456789"))(input)
|
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn digit_group_hex(input: Span) -> ParseResult<Vec<char>> {
|
2021-11-16 20:23:27 -08:00
|
|
|
many1(one_of("0123456789abcdefABCDEF"))(input)
|
|
|
|
}
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn digit_group_bin(input: Span) -> ParseResult<Vec<char>> {
|
2021-11-16 20:23:27 -08:00
|
|
|
many1(one_of("01"))(input)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn parse_binary(digits: &str) -> Result<u64, &'static str> {
|
|
|
|
let mut result: u64 = 0;
|
|
|
|
let mut multiplier = 1;
|
|
|
|
for d in digits.chars().rev() {
|
|
|
|
match d {
|
|
|
|
'1' => result += multiplier,
|
|
|
|
'0' => (),
|
|
|
|
'_' => continue,
|
|
|
|
_ => unreachable!(),
|
|
|
|
}
|
|
|
|
multiplier = match multiplier.checked_mul(2) {
|
|
|
|
Some(m) => m,
|
|
|
|
None => return Err("Binary expression will overflow"),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Ok(result)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn parse_hex(digits: &str) -> Result<u64, &'static str> {
|
|
|
|
let mut result: u64 = 0;
|
|
|
|
let mut multiplier: u64 = 1;
|
|
|
|
for d in digits.chars().rev() {
|
|
|
|
if d == '_' {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
match d.to_digit(16) {
|
|
|
|
Some(n) => result += n as u64 * multiplier,
|
|
|
|
None => return Err("Internal parser error: invalid hex digit"),
|
|
|
|
}
|
|
|
|
multiplier = match multiplier.checked_mul(16) {
|
|
|
|
Some(m) => m,
|
|
|
|
None => return Err("Hexadecimal expression will overflow"),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Ok(result)
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
|
|
|
use pretty_assertions::assert_eq;
|
|
|
|
|
|
|
|
use super::*;
|
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
fn rc(s: &str) -> Rc<String> {
|
|
|
|
Rc::new(s.to_owned())
|
|
|
|
}
|
|
|
|
macro_rules! qn {
|
|
|
|
( $( $component:ident),* ) => {
|
|
|
|
{
|
|
|
|
let mut components = vec![];
|
|
|
|
$(
|
|
|
|
components.push(rc(stringify!($component)));
|
|
|
|
)*
|
|
|
|
QualifiedName { components, id: Default::default() }
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
macro_rules! span {
|
2021-11-17 12:45:55 -08:00
|
|
|
($func:expr, $input:expr) => {{
|
2021-11-17 12:45:42 -08:00
|
|
|
let id_store: IdStore<ASTItem> = IdStore::new();
|
2021-11-17 12:45:55 -08:00
|
|
|
let span = Span::new_extra($input, Rc::new(RefCell::new(id_store)));
|
2021-11-17 12:45:42 -08:00
|
|
|
$func(span).map(|(span, x)| (*span.fragment(), x))
|
2021-11-17 12:45:55 -08:00
|
|
|
}};
|
2021-11-17 01:54:35 -08:00
|
|
|
}
|
|
|
|
|
2021-11-16 20:23:27 -08:00
|
|
|
#[test]
|
|
|
|
fn combinator_test1() {
|
2021-11-17 03:59:16 -08:00
|
|
|
assert_eq!(span!(digits(digit_group_dec), "342"), Ok(("", vec!['3', '4', '2'])));
|
|
|
|
assert_eq!(span!(bin_literal, "0b1111qsdf"), Ok(("qsdf", 15)));
|
2021-11-16 20:23:27 -08:00
|
|
|
}
|
|
|
|
|
2021-11-17 01:54:35 -08:00
|
|
|
#[test]
|
|
|
|
fn combinator_test_ws0() {
|
2021-11-17 03:59:16 -08:00
|
|
|
assert_eq!(span!(block_comment, "/*yolo*/"), Ok(("", ())));
|
|
|
|
assert_eq!(span!(block_comment, "/*yolo*/ jumpy /*nah*/"), Ok((" jumpy /*nah*/", ())));
|
|
|
|
assert_eq!(span!(ws0, "/* yolo */ "), Ok(("", ())));
|
|
|
|
assert_eq!(span!(ws0, "/* /* no */ yolo */ "), Ok(("", ())));
|
2021-11-17 01:54:35 -08:00
|
|
|
}
|
|
|
|
|
2021-11-16 20:23:27 -08:00
|
|
|
#[test]
|
|
|
|
fn combinator_test2() {
|
2021-11-17 01:54:35 -08:00
|
|
|
for s in [" 15", " 0b1111", " 1_5_", "0XF__", "0Xf"].iter() {
|
2021-11-17 03:59:16 -08:00
|
|
|
assert_eq!(span!(expression_kind, s).unwrap().1, ExpressionKind::NatLiteral(15));
|
2021-11-16 20:23:27 -08:00
|
|
|
}
|
2021-11-17 01:54:35 -08:00
|
|
|
|
2021-11-17 03:59:16 -08:00
|
|
|
assert_eq!(span!(expression_kind, " /*gay*/ true").unwrap().1, ExpressionKind::BoolLiteral(true));
|
2021-11-17 12:45:55 -08:00
|
|
|
assert_eq!(
|
|
|
|
span!(expression_kind, " /*yolo*/ barnaby").unwrap().1,
|
|
|
|
ExpressionKind::Value(qn!(barnaby))
|
|
|
|
);
|
2021-11-16 20:23:27 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn combinator_test3() {
|
2021-11-17 01:54:35 -08:00
|
|
|
let source = "{}";
|
2021-11-17 03:59:16 -08:00
|
|
|
assert_eq!(span!(block, source).unwrap().1, vec![].into());
|
2021-11-16 20:23:27 -08:00
|
|
|
let source = r#"{
|
|
|
|
|
2021-11-17 01:54:35 -08:00
|
|
|
//hella
|
|
|
|
4_5 //bog
|
|
|
|
11; /*chutney*/0xf
|
2021-11-16 20:23:27 -08:00
|
|
|
}"#;
|
2021-11-17 03:59:16 -08:00
|
|
|
let parsed = span!(block, source).map_err(|err| match err {
|
|
|
|
Err::Error(err) | Err::Failure(err) => {
|
2021-11-17 12:45:55 -08:00
|
|
|
let err = VerboseError {
|
|
|
|
errors: err.errors.into_iter().map(|(sp, kind)| (*sp.fragment(), kind)).collect(),
|
|
|
|
};
|
2021-11-17 03:59:16 -08:00
|
|
|
nom::error::convert_error(source, err)
|
2021-11-17 12:45:55 -08:00
|
|
|
}
|
|
|
|
_ => panic!(),
|
2021-11-16 20:23:27 -08:00
|
|
|
});
|
|
|
|
|
|
|
|
if let Err(err) = parsed {
|
|
|
|
println!("{}", err);
|
|
|
|
panic!("parse error desu!");
|
|
|
|
}
|
|
|
|
|
2021-11-17 12:45:55 -08:00
|
|
|
assert_eq!(
|
|
|
|
parsed.unwrap().1,
|
|
|
|
vec![
|
|
|
|
Statement {
|
|
|
|
id: Default::default(),
|
|
|
|
location: Default::default(),
|
|
|
|
kind: StatementKind::Expression(Expression::new(
|
|
|
|
Default::default(),
|
|
|
|
ExpressionKind::NatLiteral(45)
|
|
|
|
))
|
|
|
|
},
|
|
|
|
Statement {
|
|
|
|
id: Default::default(),
|
|
|
|
location: Default::default(),
|
|
|
|
kind: StatementKind::Expression(Expression::new(
|
|
|
|
Default::default(),
|
|
|
|
ExpressionKind::NatLiteral(11)
|
|
|
|
))
|
|
|
|
},
|
|
|
|
Statement {
|
|
|
|
id: Default::default(),
|
|
|
|
location: Default::default(),
|
|
|
|
kind: StatementKind::Expression(Expression::new(
|
|
|
|
Default::default(),
|
|
|
|
ExpressionKind::NatLiteral(15)
|
|
|
|
))
|
|
|
|
},
|
|
|
|
]
|
|
|
|
.into()
|
|
|
|
);
|
2021-11-16 20:23:27 -08:00
|
|
|
}
|
|
|
|
}
|