13 Commits

Author SHA1 Message Date
greg
5fcb3b4f3b combine stuff - doesn't work 2020-02-22 20:21:15 -08:00
greg
1f4ea71cf9 More combine 2020-02-20 02:50:18 -08:00
greg
cd49c2c78f Starting to experiment with combine wip 2020-02-19 04:50:00 -08:00
greg
b1ffcd709b Use VerboseError 2020-02-14 16:43:28 -08:00
greg
84455d11d5 Add TODO note 2020-02-14 16:43:21 -08:00
greg
16559d2e55 Type alias 2020-02-14 10:20:27 -08:00
greg
43cad55735 Expand parser more 2020-02-14 02:55:45 -08:00
greg
a6d065864c A lot of stuff around identifiers 2020-02-14 01:50:24 -08:00
greg
e2fc454c82 Make a bunch of things more concise 2020-02-13 03:11:46 -08:00
greg
54649246b0 Bugfix 2020-02-13 03:01:48 -08:00
greg
6759640389 Fix whitespace 2020-02-13 02:57:30 -08:00
greg
c6b0f7d7d1 Pratt parsing 2020-02-13 02:48:38 -08:00
greg
b7f7ba57d7 Added compiling nom-based parser beginnings 2020-02-13 00:38:10 -08:00
13 changed files with 690 additions and 352 deletions

754
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,8 @@
# Plan of attack
-ONLY two types of statement, Expressoin and Declaration
-modules and imports are just types of Declarables
1. modify visitor so it can handle scopes
-this is needed both to handle import scope correctly
-and also to support making FQSNs aware of function parameters

View File

@@ -5,7 +5,7 @@ authors = ["greg <greg.shuflin@protonmail.com>"]
edition = "2018"
[dependencies]
itertools = "0.10"
itertools = "0.8.0"
take_mut = "0.2.2"
maplit = "1.0.1"
lazy_static = "1.3.0"
@@ -15,6 +15,8 @@ stopwatch = "0.0.7"
derivative = "1.0.3"
colored = "1.8"
radix_trie = "0.1.5"
nom = "5.1.0"
combine = "4.0.1"
schala-lang-codegen = { path = "../codegen" }
schala-repl = { path = "../../schala-repl" }

View File

@@ -17,7 +17,7 @@ pub struct ItemId {
}
impl ItemId {
fn new(n: u32) -> ItemId {
pub fn new(n: u32) -> ItemId {
ItemId { idx: n }
}
}

View File

@@ -68,6 +68,10 @@ impl BinOp {
let s = token_kind_to_sigil(op_tok)?;
Some(binop_precedences(s))
}
pub fn precedence(&self) -> i32 {
binop_precedences(self.sigil.as_str())
}
}
fn token_kind_to_sigil<'a>(tok: &'a TokenKind) -> Option<&'a str> {

View File

@@ -2,6 +2,8 @@ use std::rc::Rc;
use std::fmt::Write;
use std::io;
use itertools::Itertools;
use crate::schala::SymbolTableHandle;
use crate::util::ScopeStack;
use crate::reduced_ast::{BoundVars, ReducedAST, Stmt, Expr, Lit, Func, Alternative, Subpattern};

View File

@@ -1,6 +1,6 @@
#![feature(trace_macros)]
//#![feature(unrestricted_attribute_tokens)]
#![feature(box_patterns, box_syntax)]
#![feature(slice_patterns, box_patterns, box_syntax)]
//! `schala-lang` is where the Schala programming language is actually implemented.
//! It defines the `Schala` type, which contains the state for a Schala REPL, and implements
@@ -32,6 +32,7 @@ mod debugging;
mod tokenizing;
mod ast;
mod parser;
mod parsing;
#[macro_use]
mod symbol_table;

View File

@@ -0,0 +1,255 @@
extern crate nom;
use std::rc::Rc;
use std::str::FromStr;
use nom::IResult;
use nom::character::complete::{one_of, space0, alphanumeric0};
use nom::bytes::complete::{tag, take, take_while, take_until};
use nom::combinator::{map, map_res, value, opt, verify};
use nom::multi::{separated_list, separated_nonempty_list, many1, many0};
use nom::error::{context, VerboseError};
use nom::branch::alt;
use nom::sequence::{pair, delimited};
use crate::ast::*;
use crate::builtin::Builtin;
type ParseResult<'a, T> = IResult<&'a str, T, VerboseError<&'a str>>;
fn single_alphabetic_character(text: &str) -> ParseResult<char> {
let p = verify(take(1usize), |s: &str| s.chars().nth(0).map(|c| c.is_alphabetic()).unwrap_or(false));
map(p, |s: &str| s.chars().nth(0).unwrap())(text)
}
fn single_alphanumeric_character(text: &str) -> ParseResult<char> {
let p = verify(take(1usize), |s: &str| s.chars().nth(0).map(|c| c.is_alphanumeric() || c == '_').unwrap_or(false));
map(p, |s: &str| s.chars().nth(0).unwrap())(text)
}
fn identifier(text: &str) -> ParseResult<Rc<String>> {
use nom::character::complete::char;
context("Identifier", map(alt((
pair(char('_'), many1(single_alphanumeric_character)),
pair(single_alphabetic_character, many0(single_alphanumeric_character))
)),
|(first, rest): (char, Vec<char>)| Rc::new(format!("{}{}", first, rest.into_iter().collect::<String>()))
))(text)
}
const OPERATOR_CHARS: &'static str = "~`!@#$%^&*-+=<>?/|";
fn parse_binop(text: &str) -> ParseResult<BinOp> {
let p = many1(one_of(OPERATOR_CHARS));
context("Binop", map(p,
|op: Vec<char>| BinOp::from_sigil(&op.into_iter().collect::<String>())
))(text)
}
fn parse_bool_literal(text: &str) -> ParseResult<ExpressionKind> {
let p = alt((
value(true, tag("true")),
value(false, tag("false"))
));
context("Bool literal", map(p, ExpressionKind::BoolLiteral))(text)
}
fn parse_number_literal(text: &str) -> ParseResult<ExpressionKind> {
let num_lit = many1(alt((
map(one_of("1234567890"), |s: char| Some(s)),
map(nom::character::complete::char('_'), |_| None)
)));
let (text, n) = context("Number literal", map_res(num_lit,
|digits: Vec<Option<char>>| {
let num_str: String = digits.into_iter().filter_map(|x| x).collect();
u64::from_str_radix(&num_str, 10)
}))(text)?;
Ok((text, ExpressionKind::NatLiteral(n)))
}
fn parse_binary_literal(input: &str) -> ParseResult<ExpressionKind> {
let (rest, _) = tag("0b")(input)?;
let (rest, n): (&str, u64) = map_res(
take_while(|c: char| c == '0' || c == '1'),
|hex_str: &str| u64::from_str_radix(hex_str, 2)
)(rest)?;
let expr = ExpressionKind::NatLiteral(n);
Ok((rest, expr))
}
fn parse_hex_literal(input: &str) -> ParseResult<ExpressionKind> {
let (rest, _) = tag("0x")(input)?;
let (rest, n): (&str, u64) = map_res(
take_while(|c: char| c.is_digit(16)),
|hex_str: &str| u64::from_str_radix(hex_str, 16)
)(rest)?;
let expr = ExpressionKind::NatLiteral(n);
Ok((rest, expr))
}
fn parse_string_literal(text: &str) -> ParseResult<ExpressionKind> {
use nom::character::complete::char;
let p = delimited(char('"'), take_until("\""), char('"'));
context("String literal", map(p,
|s: &str| ExpressionKind::StringLiteral(Rc::new(s.to_string()))
))(text)
}
fn literal(input: &str) -> ParseResult<ExpressionKind> {
context("Literal", alt((
parse_hex_literal,
parse_binary_literal,
parse_number_literal,
parse_bool_literal,
parse_string_literal,
)))(input)
}
fn paren_expr(text: &str) -> ParseResult<ExpressionKind> {
use nom::character::complete::char;
context("Paren expression", delimited(char('('), expression_kind, char(')')))(text)
}
fn prefix_op(input: &str) -> ParseResult<PrefixOp> {
use nom::character::complete::char;
let p = alt((char('+'), char('-'), char('!')));
map(p, |sigil| PrefixOp::from_str(&sigil.to_string()).unwrap())(input)
}
fn identifier_expr(text: &str) -> ParseResult<ExpressionKind> {
let (text, qualified_identifier) = map(
qualified_identifier_list,
|components| QualifiedName { id: ItemId::new(0), components }
)(text)?;
//TODO handle struct literals
let exp = Expression::new(ItemId::new(0), ExpressionKind::Value(qualified_identifier));
Ok((text, exp.kind))
}
fn qualified_identifier_list(text: &str) -> ParseResult<Vec<Rc<String>>> {
context("Qualified identifier list", separated_nonempty_list(tag("::"), identifier))(text)
}
fn primary_expr(text: &str) -> ParseResult<ExpressionKind> {
// primary := literal | paren_expr | if_expr | for_expr | while_expr | identifier_expr | lambda_expr | anonymous_struct | list_expr
alt((
literal,
paren_expr,
identifier_expr,
))(text)
}
fn invocation_argument(text: &str) -> ParseResult<InvocationArgument> {
use nom::character::complete::char;
alt((
value(InvocationArgument::Ignored, pair(char('_'), alphanumeric0)),
map(expression_kind, |kind: ExpressionKind| InvocationArgument::Positional(
Expression { id: ItemId::new(0), kind, type_anno: None }))
//map(identifier, |id: Rc<String>|
))(text)
}
fn call_expr(text: &str) -> ParseResult<ExpressionKind> {
use nom::character::complete::char;
let (text, expr) = primary_expr(text)?;
let (text, call_part) = opt(
delimited(char('('), separated_list(char(','), invocation_argument), char(')'))
)(text)?;
let output = if let Some(arguments) = call_part {
let f = bx!(Expression { id: ItemId::new(0), kind: expr, type_anno: None });
ExpressionKind::Call { f, arguments }
} else {
expr
};
Ok((text, output))
}
fn prefix_expr(text: &str) -> ParseResult<ExpressionKind> {
let (text, pfx) = delimited(space0, opt(prefix_op), space0)(text)?;
let (text, result) = call_expr(text)?;
match pfx {
None => Ok((text, result)),
Some(pfx) => {
let exp = Expression { id: ItemId::new(0), kind: result, type_anno: None };
Ok((text, ExpressionKind::PrefixExp(pfx, Box::new(exp))))
}
}
}
// this implements Pratt parsing, see http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
fn precedence_expr(text: &str) -> ParseResult<ExpressionKind> {
fn inner_precedence_expr(input: &str, precedence: i32) -> ParseResult<ExpressionKind> {
let (mut outer_rest, mut lhs) = prefix_expr(input)?;
loop {
let (rest, _) = space0(outer_rest)?;
let (rest, maybe_binop) = opt(parse_binop)(rest)?;
let (new_precedence, binop) = match maybe_binop {
Some(binop) => (binop.precedence(), binop),
None => break,
};
if precedence >= new_precedence {
break;
}
let (rest, _) = space0(rest)?;
let (rest, rhs) = inner_precedence_expr(rest, new_precedence)?;
outer_rest = rest;
lhs = ExpressionKind::BinExp(binop,
bx!(Expression::new(ItemId::new(0), lhs)),
bx!(Expression::new(ItemId::new(0), rhs))
);
}
Ok((outer_rest, lhs))
}
context("Precedence expression",
|input| inner_precedence_expr(input, BinOp::min_precedence())
)(text)
}
fn expression_kind(text: &str) -> ParseResult<ExpressionKind> {
precedence_expr(text)
}
mod thing {
use crate::ast::*;
use crate::builtin::Builtin;
use combine::parser::range::{range, take_while1};
use combine::parser::repeat::{many1, sep_by};
use combine::Stream;
use combine::*;
fn number_literal<I>(input: &str) -> impl Parser<I, Output=u64>
where I: Stream<Token = char>, I::Error: ParseError<I::Token, I::Range, I::Position> {
use combine::parser::char::digit;
many1(digit())
.flat_map(|digits: Vec<char>| {
//let num_str: String = digits.into_iter().filter_map(|x| x).collect();
let num_str: String = digits.into_iter().collect();
u64::from_str_radix(&num_str, 10)
})
}
pub fn perform_parsing(input: &str) -> String {
use combine::parser::char::char;
//let identifier = take_while1(|c: char| c.is_alphabetic());
//let mut parser = sep_by(identifier, range(", "));
let parser = sep_by(char(','), number_literal);
let result: Result<(Vec<&str>, &str), _> = parser.easy_parse(input);
format!("{:?}", result)
}
}
pub fn perform_parsing(input: &str) -> Result<String, String> {
// let output = expression_kind(input)
let output = thing::perform_parsing(input);
Ok(output)
}

View File

@@ -10,7 +10,7 @@ use schala_repl::{ProgrammingLanguageInterface,
ComputationRequest, ComputationResponse,
LangMetaRequest, LangMetaResponse, GlobalOutputStats,
DebugResponse, DebugAsk};
use crate::{ast, reduced_ast, tokenizing, parsing, eval, typechecking, symbol_table, source_map};
use crate::{ast, reduced_ast, tokenizing, parsing, parser, eval, typechecking, symbol_table, source_map};
pub type SymbolTableHandle = Rc<RefCell<symbol_table::SymbolTable>>;
pub type SourceMapHandle = Rc<RefCell<source_map::SourceMap>>;
@@ -195,7 +195,7 @@ fn eval(input: reduced_ast::ReducedAST, handle: &mut Schala, comp: Option<&mut P
.collect();
let eval_output: Result<String, String> = text_output
.map(|v| { Iterator::intersperse(v.into_iter(), "\n".to_owned()).collect() });
.map(|v| { v.into_iter().intersperse(format!("\n")).collect() });
eval_output
}
@@ -319,6 +319,8 @@ impl ProgrammingLanguageInterface for Schala {
total_duration, stage_durations
};
let main_output = parser::perform_parsing(source);
ComputationResponse {
main_output,
global_output_stats,

View File

@@ -123,7 +123,8 @@ type CharData = (usize, usize, char);
pub fn tokenize(input: &str) -> Vec<Token> {
let mut tokens: Vec<Token> = Vec::new();
let mut input = Iterator::intersperse(input.lines().enumerate(), (0, "\n"))
let mut input = input.lines().enumerate()
.intersperse((0, "\n"))
.flat_map(|(line_idx, ref line)| {
line.chars().enumerate().map(move |(ch_idx, ch)| (line_idx, ch_idx, ch))
})

View File

@@ -7,7 +7,7 @@ edition = "2018"
[dependencies]
llvm-sys = "70.0.2"
take_mut = "0.2.2"
itertools = "0.10"
itertools = "0.5.8"
getopts = "0.2.18"
lazy_static = "0.2.8"
maplit = "*"

View File

@@ -1,4 +1,5 @@
#![feature(box_patterns, box_syntax, proc_macro_hygiene, decl_macro)]
#![feature(link_args)]
#![feature(slice_patterns, box_patterns, box_syntax, proc_macro_hygiene, decl_macro)]
#![feature(plugin)]
extern crate getopts;
extern crate linefeed;

View File

@@ -1,6 +1,7 @@
use super::{Repl, InterpreterDirectiveOutput};
use crate::repl::help::help;
use crate::language::{LangMetaRequest, LangMetaResponse, DebugAsk, DebugResponse};
use itertools::Itertools;
use std::fmt::Write as FmtWrite;
#[derive(Debug, Clone)]