Compare commits
No commits in common. "94ee3e18970b46906682cd1e241d16ab7f6fafcb" and "8a9c63eccf45d1ff3c4950e45900fecfb7f05f90" have entirely different histories.
94ee3e1897
...
8a9c63eccf
25
Cargo.lock
generated
25
Cargo.lock
generated
@ -558,30 +558,6 @@ dependencies = [
|
|||||||
"winapi 0.3.8",
|
"winapi 0.3.8",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "peg"
|
|
||||||
version = "0.7.0"
|
|
||||||
source = "git+https://github.com/kevinmehall/rust-peg?rev=960222580c8da25b17d32c2aae6f52f902728b62#960222580c8da25b17d32c2aae6f52f902728b62"
|
|
||||||
dependencies = [
|
|
||||||
"peg-macros",
|
|
||||||
"peg-runtime",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "peg-macros"
|
|
||||||
version = "0.7.0"
|
|
||||||
source = "git+https://github.com/kevinmehall/rust-peg?rev=960222580c8da25b17d32c2aae6f52f902728b62#960222580c8da25b17d32c2aae6f52f902728b62"
|
|
||||||
dependencies = [
|
|
||||||
"peg-runtime",
|
|
||||||
"proc-macro2 1.0.30",
|
|
||||||
"quote 1.0.10",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "peg-runtime"
|
|
||||||
version = "0.7.0"
|
|
||||||
source = "git+https://github.com/kevinmehall/rust-peg?rev=960222580c8da25b17d32c2aae6f52f902728b62#960222580c8da25b17d32c2aae6f52f902728b62"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "phf"
|
name = "phf"
|
||||||
version = "0.7.24"
|
version = "0.7.24"
|
||||||
@ -919,7 +895,6 @@ dependencies = [
|
|||||||
"ena",
|
"ena",
|
||||||
"failure",
|
"failure",
|
||||||
"itertools",
|
"itertools",
|
||||||
"peg",
|
|
||||||
"pretty_assertions",
|
"pretty_assertions",
|
||||||
"radix_trie",
|
"radix_trie",
|
||||||
"schala-lang-codegen",
|
"schala-lang-codegen",
|
||||||
|
@ -34,6 +34,7 @@ impl Fold for RecursiveDescentFn {
|
|||||||
}
|
}
|
||||||
|
|
||||||
result.map_err(|mut parse_error: ParseError| {
|
result.map_err(|mut parse_error: ParseError| {
|
||||||
|
parse_error.production_name = Some(stringify!(#ident).to_string());
|
||||||
parse_error
|
parse_error
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -14,9 +14,6 @@ derivative = "1.0.3"
|
|||||||
colored = "1.8"
|
colored = "1.8"
|
||||||
radix_trie = "0.1.5"
|
radix_trie = "0.1.5"
|
||||||
assert_matches = "1.5"
|
assert_matches = "1.5"
|
||||||
#peg = "0.7.0"
|
|
||||||
peg = { git = "https://github.com/kevinmehall/rust-peg", rev = "960222580c8da25b17d32c2aae6f52f902728b62" }
|
|
||||||
|
|
||||||
|
|
||||||
schala-lang-codegen = { path = "../codegen" }
|
schala-lang-codegen = { path = "../codegen" }
|
||||||
schala-repl = { path = "../../schala-repl" }
|
schala-repl = { path = "../../schala-repl" }
|
||||||
|
@ -14,8 +14,8 @@ fn getline(arg) { }
|
|||||||
|
|
||||||
fn map(input: Option<T>, func: Func): Option<T> {
|
fn map(input: Option<T>, func: Func): Option<T> {
|
||||||
if input {
|
if input {
|
||||||
is Option::Some(x) then Option::Some(func(x))
|
is Option::Some(x) then Option::Some(func(x)),
|
||||||
is Option::None then Option::None
|
is Option::None then Option::None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ pub use visitor::*;
|
|||||||
use crate::{
|
use crate::{
|
||||||
derivative::Derivative,
|
derivative::Derivative,
|
||||||
identifier::{define_id_kind, Id},
|
identifier::{define_id_kind, Id},
|
||||||
parsing::Location,
|
tokenizing::Location,
|
||||||
};
|
};
|
||||||
|
|
||||||
define_id_kind!(ASTItem);
|
define_id_kind!(ASTItem);
|
||||||
@ -197,7 +197,6 @@ pub struct TypeSingletonName {
|
|||||||
pub enum ExpressionKind {
|
pub enum ExpressionKind {
|
||||||
NatLiteral(u64),
|
NatLiteral(u64),
|
||||||
FloatLiteral(f64),
|
FloatLiteral(f64),
|
||||||
//TODO StringLiteral variant needs to support prefixes
|
|
||||||
StringLiteral(Rc<String>),
|
StringLiteral(Rc<String>),
|
||||||
BoolLiteral(bool),
|
BoolLiteral(bool),
|
||||||
BinExp(BinOp, Box<Expression>, Box<Expression>),
|
BinExp(BinOp, Box<Expression>, Box<Expression>),
|
||||||
@ -240,7 +239,7 @@ pub struct ConditionArm {
|
|||||||
pub enum Condition {
|
pub enum Condition {
|
||||||
Pattern(Pattern),
|
Pattern(Pattern),
|
||||||
TruncatedOp(BinOp, Expression),
|
TruncatedOp(BinOp, Expression),
|
||||||
//Expression(Expression), //I'm pretty sure I don't actually want this
|
Expression(Expression),
|
||||||
Else,
|
Else,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -263,7 +262,7 @@ pub enum PatternLiteral {
|
|||||||
|
|
||||||
#[derive(Debug, PartialEq, Clone)]
|
#[derive(Debug, PartialEq, Clone)]
|
||||||
pub struct Enumerator {
|
pub struct Enumerator {
|
||||||
pub id: Rc<String>, //TODO rename this field
|
pub id: Rc<String>,
|
||||||
pub generator: Expression,
|
pub generator: Expression,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
|
||||||
|
use crate::tokenizing::TokenKind;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Clone)]
|
#[derive(Debug, PartialEq, Clone)]
|
||||||
pub struct PrefixOp {
|
pub struct PrefixOp {
|
||||||
sigil: Rc<String>,
|
sigil: Rc<String>,
|
||||||
@ -13,6 +15,10 @@ impl PrefixOp {
|
|||||||
pub fn sigil(&self) -> &str {
|
pub fn sigil(&self) -> &str {
|
||||||
&self.sigil
|
&self.sigil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_prefix(op: &str) -> bool {
|
||||||
|
matches!(op, "+" | "-" | "!")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Clone)]
|
#[derive(Debug, PartialEq, Clone)]
|
||||||
@ -29,14 +35,34 @@ impl BinOp {
|
|||||||
&self.sigil
|
&self.sigil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn from_sigil_token(tok: &TokenKind) -> Option<BinOp> {
|
||||||
|
let s = token_kind_to_sigil(tok)?;
|
||||||
|
Some(BinOp::from_sigil(s))
|
||||||
|
}
|
||||||
|
|
||||||
pub fn min_precedence() -> i32 {
|
pub fn min_precedence() -> i32 {
|
||||||
i32::min_value()
|
i32::min_value()
|
||||||
}
|
}
|
||||||
pub fn get_precedence(&self) -> i32 {
|
pub fn get_precedence_from_token(op_tok: &TokenKind) -> Option<i32> {
|
||||||
binop_precedences(self.sigil.as_ref())
|
let s = token_kind_to_sigil(op_tok)?;
|
||||||
|
Some(binop_precedences(s))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn token_kind_to_sigil(tok: &TokenKind) -> Option<&str> {
|
||||||
|
use self::TokenKind::*;
|
||||||
|
Some(match tok {
|
||||||
|
Operator(op) => op.as_str(),
|
||||||
|
Period => ".",
|
||||||
|
Pipe => "|",
|
||||||
|
Slash => "/",
|
||||||
|
LAngleBracket => "<",
|
||||||
|
RAngleBracket => ">",
|
||||||
|
Equals => "=",
|
||||||
|
_ => return None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
fn binop_precedences(s: &str) -> i32 {
|
fn binop_precedences(s: &str) -> i32 {
|
||||||
let default = 10_000_000;
|
let default = 10_000_000;
|
||||||
match s {
|
match s {
|
||||||
|
@ -166,6 +166,9 @@ pub fn walk_if_expr_body<V: ASTVisitor>(v: &mut V, body: &IfExpressionBody) {
|
|||||||
Condition::TruncatedOp(ref _binop, ref expr) => {
|
Condition::TruncatedOp(ref _binop, ref expr) => {
|
||||||
walk_expression(v, expr);
|
walk_expression(v, expr);
|
||||||
}
|
}
|
||||||
|
Condition::Expression(ref expr) => {
|
||||||
|
walk_expression(v, expr);
|
||||||
|
}
|
||||||
Condition::Else => (),
|
Condition::Else => (),
|
||||||
}
|
}
|
||||||
if let Some(ref guard) = arm.guard {
|
if let Some(ref guard) = arm.guard {
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
use crate::{
|
use crate::{
|
||||||
parsing::{Location, ParseError},
|
parsing::ParseError,
|
||||||
schala::{SourceReference, Stage},
|
schala::{SourceReference, Stage},
|
||||||
symbol_table::SymbolError,
|
symbol_table::SymbolError,
|
||||||
|
tokenizing::{Location, Token, TokenKind},
|
||||||
type_inference::TypeError,
|
type_inference::TypeError,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -51,6 +52,26 @@ impl SchalaError {
|
|||||||
errors: vec![],
|
errors: vec![],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn from_tokens(tokens: &[Token]) -> Option<SchalaError> {
|
||||||
|
let token_errors: Vec<Error> = tokens
|
||||||
|
.iter()
|
||||||
|
.filter_map(|tok| match tok.kind {
|
||||||
|
TokenKind::Error(ref err) => Some(Error {
|
||||||
|
location: Some(tok.location),
|
||||||
|
text: Some(err.clone()),
|
||||||
|
stage: Stage::Tokenizing,
|
||||||
|
}),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if token_errors.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(SchalaError { errors: token_errors, formatted_parse_error: None })
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
@ -61,18 +82,22 @@ struct Error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn format_parse_error(error: ParseError, source_reference: &SourceReference) -> String {
|
fn format_parse_error(error: ParseError, source_reference: &SourceReference) -> String {
|
||||||
let offset = error.location.offset;
|
let line_num = error.token.location.line_num;
|
||||||
let (line_start, line_num, line_from_program) = source_reference.get_line(offset);
|
let ch = error.token.location.char_num;
|
||||||
let ch = offset - line_start;
|
let line_from_program = source_reference.get_line(line_num as usize);
|
||||||
|
let location_pointer = format!("{}^", " ".repeat(ch.into()));
|
||||||
let location_pointer = format!("{}^", " ".repeat(ch));
|
|
||||||
|
|
||||||
let line_num_digits = format!("{}", line_num).chars().count();
|
let line_num_digits = format!("{}", line_num).chars().count();
|
||||||
let space_padding = " ".repeat(line_num_digits);
|
let space_padding = " ".repeat(line_num_digits);
|
||||||
|
|
||||||
|
let production = match error.production_name {
|
||||||
|
Some(n) => format!("\n(from production \"{}\")", n),
|
||||||
|
None => "".to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
format!(
|
format!(
|
||||||
r#"
|
r#"
|
||||||
{error_msg}
|
{error_msg}{production}
|
||||||
{space_padding} |
|
{space_padding} |
|
||||||
{line_num} | {}
|
{line_num} | {}
|
||||||
{space_padding} | {}
|
{space_padding} | {}
|
||||||
@ -82,5 +107,6 @@ fn format_parse_error(error: ParseError, source_reference: &SourceReference) ->
|
|||||||
error_msg = error.msg,
|
error_msg = error.msg,
|
||||||
space_padding = space_padding,
|
space_padding = space_padding,
|
||||||
line_num = line_num,
|
line_num = line_num,
|
||||||
|
production = production
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
//! `ProgrammingLanguageInterface` and the chain of compiler passes for it.
|
//! `ProgrammingLanguageInterface` and the chain of compiler passes for it.
|
||||||
|
|
||||||
extern crate schala_repl;
|
extern crate schala_repl;
|
||||||
|
#[macro_use]
|
||||||
extern crate schala_lang_codegen;
|
extern crate schala_lang_codegen;
|
||||||
extern crate derivative;
|
extern crate derivative;
|
||||||
|
|
||||||
@ -18,6 +19,7 @@ mod type_inference;
|
|||||||
|
|
||||||
mod ast;
|
mod ast;
|
||||||
mod parsing;
|
mod parsing;
|
||||||
|
mod tokenizing;
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
mod symbol_table;
|
mod symbol_table;
|
||||||
mod builtin;
|
mod builtin;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,592 +0,0 @@
|
|||||||
use std::rc::Rc;
|
|
||||||
|
|
||||||
//TODO make use of the format_parse_error function
|
|
||||||
//use crate::error::{SchalaError, format_parse_error};
|
|
||||||
use crate::{
|
|
||||||
ast::*,
|
|
||||||
identifier::{Id, IdStore},
|
|
||||||
parsing::ParseError,
|
|
||||||
};
|
|
||||||
|
|
||||||
fn rc_string(s: &str) -> Rc<String> {
|
|
||||||
Rc::new(s.to_string())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct Parser {
|
|
||||||
id_store: IdStore<ASTItem>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Parser {
|
|
||||||
pub(crate) fn new() -> Self {
|
|
||||||
Self { id_store: IdStore::new() }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn parse(&mut self, input: &str) -> Result<AST, ParseError> {
|
|
||||||
use peg::str::LineCol;
|
|
||||||
|
|
||||||
schala_parser::program(input, self).map_err(|err: peg::error::ParseError<LineCol>| {
|
|
||||||
let msg = err.to_string();
|
|
||||||
ParseError {
|
|
||||||
msg,
|
|
||||||
location: err.location.offset.into(),
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn fresh(&mut self) -> Id<ASTItem> {
|
|
||||||
self.id_store.fresh()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
enum ExtendedPart<'a> {
|
|
||||||
Index(Vec<Expression>),
|
|
||||||
Accessor(&'a str),
|
|
||||||
Call(Vec<InvocationArgument>),
|
|
||||||
}
|
|
||||||
|
|
||||||
peg::parser! {
|
|
||||||
pub grammar schala_parser() for str {
|
|
||||||
|
|
||||||
rule whitespace() = [' ' | '\t' ]
|
|
||||||
rule whitespace_or_newline() = [' ' | '\t' | '\n' ]
|
|
||||||
|
|
||||||
rule _ = quiet!{ (block_comment() / line_comment() / whitespace())* }
|
|
||||||
|
|
||||||
rule __ = quiet!{ (block_comment() / line_comment() / whitespace_or_newline())* }
|
|
||||||
|
|
||||||
rule block_comment() = "/*" (block_comment() / !"*/" [_])* "*/"
|
|
||||||
rule line_comment() = "//" (!['\n'] [_])* &"\n"
|
|
||||||
|
|
||||||
|
|
||||||
pub rule program(parser: &mut Parser) -> AST =
|
|
||||||
__ statements:(statement(parser) ** (delimiter()+) ) __ { AST { id: parser.fresh(), statements: statements.into() } }
|
|
||||||
|
|
||||||
rule delimiter() = (";" / "\n")+
|
|
||||||
|
|
||||||
//Note - this is a hack, ideally the rule `rule block() -> Block = "{" _ items:(statement() **
|
|
||||||
//delimiter()) _ "}" { items.into() }` would've worked, but it doesn't.
|
|
||||||
pub rule block(parser: &mut Parser) -> Block =
|
|
||||||
"{" __ items:block_item(parser)* __ "}" { items.into() } /
|
|
||||||
"{" __ stmt:statement(parser) __ "}" { vec![stmt].into() }
|
|
||||||
|
|
||||||
rule block_item(parser: &mut Parser) -> Statement =
|
|
||||||
_ stmt:statement(parser) _ delimiter()+ { stmt }
|
|
||||||
|
|
||||||
rule statement(parser: &mut Parser) -> Statement =
|
|
||||||
_ pos:position!() kind:statement_kind(parser) _ { Statement { id: parser.fresh(), location: pos.into(), kind } }
|
|
||||||
|
|
||||||
rule statement_kind(parser: &mut Parser) -> StatementKind =
|
|
||||||
__ import:import(parser) { StatementKind::Import(import) } /
|
|
||||||
__ decl:declaration(parser) { StatementKind::Declaration(decl) } /
|
|
||||||
__ flow:flow(parser) { StatementKind::Flow(flow) } /
|
|
||||||
__ expr:expression(parser) { StatementKind::Expression(expr) }
|
|
||||||
|
|
||||||
rule flow(parser: &mut Parser) -> FlowControl =
|
|
||||||
"continue" { FlowControl::Continue } /
|
|
||||||
"break" { FlowControl::Break } /
|
|
||||||
"return" _ expr:expression(parser)? { FlowControl::Return(expr) }
|
|
||||||
|
|
||||||
rule import(parser: &mut Parser) -> ImportSpecifier =
|
|
||||||
"import" _ path_components:path_components() suffix:import_suffix()? {
|
|
||||||
ImportSpecifier {
|
|
||||||
id: parser.fresh(),
|
|
||||||
path_components,
|
|
||||||
imported_names: suffix.unwrap_or_else(|| ImportedNames::LastOfPath)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rule path_components() -> Vec<Rc<String>> =
|
|
||||||
"::"? name:identifier() rest:path_component()* {
|
|
||||||
let mut items = vec![rc_string(name)];
|
|
||||||
items.extend(rest.into_iter().map(|n| rc_string(n)));
|
|
||||||
items
|
|
||||||
}
|
|
||||||
|
|
||||||
rule path_component() -> &'input str = "::" ident:identifier() { ident }
|
|
||||||
|
|
||||||
rule import_suffix() -> ImportedNames =
|
|
||||||
"::*" { ImportedNames::All } /
|
|
||||||
"::{" __ names:(identifier() ** (_ "," _)) __ "}" { ImportedNames::List(names.into_iter().map(rc_string).collect()) }
|
|
||||||
|
|
||||||
|
|
||||||
rule declaration(parser: &mut Parser) -> Declaration =
|
|
||||||
binding(parser) / type_decl(parser) / annotation(parser) / func(parser) / interface(parser) /
|
|
||||||
implementation(parser) / module(parser)
|
|
||||||
|
|
||||||
rule module(parser: &mut Parser) -> Declaration =
|
|
||||||
"module" _ name:identifier() _ items:block(parser) { Declaration::Module { name: rc_string(name), items } }
|
|
||||||
|
|
||||||
rule implementation(parser: &mut Parser) -> Declaration =
|
|
||||||
"impl" _ interface:type_singleton_name() _ "for" _ type_name:type_identifier() _ block:decl_block(parser) {
|
|
||||||
Declaration::Impl { type_name, interface_name: Some(interface), block }
|
|
||||||
|
|
||||||
} /
|
|
||||||
"impl" _ type_name:type_identifier() _ block:decl_block(parser) {
|
|
||||||
Declaration::Impl { type_name, interface_name: None, block }
|
|
||||||
}
|
|
||||||
|
|
||||||
rule decl_block(parser: &mut Parser) -> Vec<Declaration> =
|
|
||||||
"{" __ decls:(func_declaration(parser) ** (delimiter()+)) __ "}" { decls }
|
|
||||||
|
|
||||||
rule interface(parser: &mut Parser) -> Declaration =
|
|
||||||
"interface" _ name:identifier() _ signatures:signature_block(parser) { Declaration::Interface { name: rc_string(name), signatures } }
|
|
||||||
|
|
||||||
rule signature_block(parser: &mut Parser) -> Vec<Signature> =
|
|
||||||
"{" __ signatures:(func_signature(parser) ** (delimiter()+)) __ "}" { signatures }
|
|
||||||
|
|
||||||
rule func(parser: &mut Parser) -> Declaration =
|
|
||||||
decl:func_declaration(parser) { decl } /
|
|
||||||
sig:func_signature(parser) { Declaration::FuncSig(sig) }
|
|
||||||
|
|
||||||
rule func_declaration(parser: &mut Parser) -> Declaration =
|
|
||||||
_ sig:func_signature(parser) __ body:block(parser) { Declaration::FuncDecl(sig, body) }
|
|
||||||
|
|
||||||
//TODO handle operators
|
|
||||||
rule func_signature(parser: &mut Parser) -> Signature =
|
|
||||||
_ "fn" _ name:identifier() "(" _ params:formal_params(parser) _ ")" _ type_anno:type_anno()? { Signature {
|
|
||||||
name: rc_string(name), operator: false, params, type_anno
|
|
||||||
} }
|
|
||||||
|
|
||||||
rule formal_params(parser: &mut Parser) -> Vec<FormalParam> =
|
|
||||||
params:(formal_param(parser) ** (_ "," _)) {? if params.len() < 256 { Ok(params) } else {
|
|
||||||
Err("function-too-long") }
|
|
||||||
}
|
|
||||||
|
|
||||||
rule formal_param(parser: &mut Parser) -> FormalParam =
|
|
||||||
name:identifier() _ anno:type_anno()? _ "=" expr:expression(parser) { FormalParam { name: rc_string(name),
|
|
||||||
default: Some(expr), anno } } /
|
|
||||||
name:identifier() _ anno:type_anno()? { FormalParam { name: rc_string(name), default: None, anno } }
|
|
||||||
|
|
||||||
|
|
||||||
rule annotation(parser: &mut Parser) -> Declaration =
|
|
||||||
"@" name:identifier() args:annotation_args(parser)? delimiter()+ _ inner:statement(parser) { Declaration::Annotation {
|
|
||||||
name: rc_string(name), arguments: if let Some(args) = args { args } else { vec![] }, inner: Box::new(inner) }
|
|
||||||
}
|
|
||||||
|
|
||||||
rule annotation_args(parser: &mut Parser) -> Vec<Expression> =
|
|
||||||
"(" _ args:(expression(parser) ** (_ "," _)) _ ")" { args }
|
|
||||||
|
|
||||||
|
|
||||||
rule binding(parser: &mut Parser) -> Declaration =
|
|
||||||
"let" _ mutable:"mut"? _ ident:identifier() _ type_anno:type_anno()? _ "=" _ expr:expression(parser) {
|
|
||||||
Declaration::Binding { name: Rc::new(ident.to_string()), constant: mutable.is_none(),
|
|
||||||
type_anno, expr }
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
rule type_decl(parser: &mut Parser) -> Declaration =
|
|
||||||
"type" _ "alias" _ alias:type_alias() { alias } /
|
|
||||||
"type" _ mutable:"mut"? _ name:type_singleton_name() _ "=" _ body:type_body(parser) {
|
|
||||||
Declaration::TypeDecl { name, body, mutable: mutable.is_some() }
|
|
||||||
}
|
|
||||||
|
|
||||||
rule type_singleton_name() -> TypeSingletonName =
|
|
||||||
name:identifier() params:type_params()? { TypeSingletonName {
|
|
||||||
name: rc_string(name), params: if let Some(params) = params { params } else { vec![] }
|
|
||||||
} }
|
|
||||||
|
|
||||||
rule type_params() -> Vec<TypeIdentifier> =
|
|
||||||
"<" _ idents:(type_identifier() ** (_ "," _)) _ ">" { idents }
|
|
||||||
|
|
||||||
rule type_identifier() -> TypeIdentifier =
|
|
||||||
"(" _ items:(type_identifier() ** (_ "," _)) _ ")" { TypeIdentifier::Tuple(items) } /
|
|
||||||
singleton:type_singleton_name() { TypeIdentifier::Singleton(singleton) }
|
|
||||||
|
|
||||||
rule type_body(parser: &mut Parser) -> TypeBody =
|
|
||||||
"{" _ items:(record_variant_item() ** (__ "," __)) __ "}" { TypeBody::ImmediateRecord(parser.fresh(), items) } /
|
|
||||||
variants:(variant_spec(parser) ** (__ "|" __)) { TypeBody::Variants(variants) }
|
|
||||||
|
|
||||||
rule variant_spec(parser: &mut Parser) -> Variant =
|
|
||||||
name:identifier() __ "{" __ typed_identifier_list:(record_variant_item() ** (__ "," __)) __ ","? __ "}" { Variant {
|
|
||||||
id: parser.fresh(), name: rc_string(name), kind: VariantKind::Record(typed_identifier_list)
|
|
||||||
} } /
|
|
||||||
name:identifier() "(" tuple_members:(type_identifier() ++ (__ "," __)) ")" { Variant {
|
|
||||||
id: parser.fresh(), name: rc_string(name), kind: VariantKind::TupleStruct(tuple_members) } } /
|
|
||||||
name:identifier() { Variant { id: parser.fresh(), name: rc_string(name), kind: VariantKind::UnitStruct } }
|
|
||||||
|
|
||||||
rule record_variant_item() -> (Rc<String>, TypeIdentifier) =
|
|
||||||
name:identifier() _ ":" _ ty:type_identifier() { (rc_string(name), ty) }
|
|
||||||
|
|
||||||
rule type_alias() -> Declaration =
|
|
||||||
alias:identifier() _ "=" _ name:identifier() { Declaration::TypeAlias { alias: rc_string(alias), original: rc_string(name), } }
|
|
||||||
|
|
||||||
rule type_anno() -> TypeIdentifier =
|
|
||||||
":" _ identifier:type_identifier() { identifier }
|
|
||||||
|
|
||||||
pub rule expression(parser: &mut Parser) -> Expression =
|
|
||||||
__ kind:expression_kind(true, parser) _ type_anno:type_anno()? { Expression { id: parser.fresh(), type_anno, kind } }
|
|
||||||
|
|
||||||
rule expression_no_struct(parser: &mut Parser) -> Expression =
|
|
||||||
__ kind:expression_kind(false, parser) { Expression { id: parser.fresh(), type_anno: None, kind: kind } }
|
|
||||||
|
|
||||||
rule expression_kind(struct_ok: bool, parser: &mut Parser) -> ExpressionKind =
|
|
||||||
precedence_expr(struct_ok, parser)
|
|
||||||
|
|
||||||
rule precedence_expr(struct_ok: bool, parser: &mut Parser) -> ExpressionKind =
|
|
||||||
first:prefix_expr(struct_ok, parser) _ next:(precedence_continuation(struct_ok, parser))* {
|
|
||||||
let next = next.into_iter().map(|(sigil, expr)| (BinOp::from_sigil(sigil), expr)).collect();
|
|
||||||
BinopSequence { first, next }.do_precedence(parser)
|
|
||||||
}
|
|
||||||
|
|
||||||
rule precedence_continuation(struct_ok: bool, parser: &mut Parser) -> (&'input str, ExpressionKind) =
|
|
||||||
op:operator() _ expr:prefix_expr(struct_ok, parser) _ { (op, expr) }
|
|
||||||
|
|
||||||
rule prefix_expr(struct_ok: bool, parser: &mut Parser) -> ExpressionKind =
|
|
||||||
prefix:prefix()? expr:extended_expr(struct_ok, parser) {
|
|
||||||
if let Some(p) = prefix {
|
|
||||||
let expr = Expression::new(parser.fresh(), expr);
|
|
||||||
let prefix = PrefixOp::from_sigil(p);
|
|
||||||
ExpressionKind::PrefixExp(prefix, Box::new(expr))
|
|
||||||
} else {
|
|
||||||
expr
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
rule prefix() -> &'input str =
|
|
||||||
$(['+' | '-' | '!' ])
|
|
||||||
|
|
||||||
//TODO make the definition of operators more complex
|
|
||||||
rule operator() -> &'input str =
|
|
||||||
quiet!{!"*/" s:$( ['+' | '-' | '*' | '/' | '%' | '<' | '>' | '=' | '!' | '$' | '&' | '|' | '?' | '^' | '`']+ ) { s } } /
|
|
||||||
expected!("operator")
|
|
||||||
|
|
||||||
rule extended_expr(struct_ok: bool, parser: &mut Parser) -> ExpressionKind =
|
|
||||||
primary:primary(struct_ok, parser) parts:(extended_expr_part(parser)*) {
|
|
||||||
let mut expression = Expression::new(parser.fresh(), primary);
|
|
||||||
for part in parts.into_iter() {
|
|
||||||
let kind = match part {
|
|
||||||
ExtendedPart::Index(indexers) => {
|
|
||||||
ExpressionKind::Index { indexee: Box::new(expression), indexers }
|
|
||||||
},
|
|
||||||
ExtendedPart::Accessor(name) => {
|
|
||||||
let name = rc_string(name);
|
|
||||||
ExpressionKind::Access { name, expr: Box::new(expression) }
|
|
||||||
},
|
|
||||||
ExtendedPart::Call(arguments) => {
|
|
||||||
ExpressionKind::Call { f: Box::new(expression), arguments }
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
expression = Expression::new(parser.fresh(), kind);
|
|
||||||
}
|
|
||||||
|
|
||||||
expression.kind
|
|
||||||
}
|
|
||||||
|
|
||||||
rule extended_expr_part(parser: &mut Parser) -> ExtendedPart<'input> =
|
|
||||||
indexers:index_part(parser) { ExtendedPart::Index(indexers) } /
|
|
||||||
arguments:call_part(parser) { ExtendedPart::Call(arguments) } /
|
|
||||||
"." name:identifier() { ExtendedPart::Accessor(name) }
|
|
||||||
|
|
||||||
rule index_part(parser: &mut Parser) -> Vec<Expression> =
|
|
||||||
"[" indexers:(expression(parser) ++ ",") "]" { indexers }
|
|
||||||
|
|
||||||
rule call_part(parser: &mut Parser) -> Vec<InvocationArgument> =
|
|
||||||
"(" arguments:(invocation_argument(parser) ** ",") ")" { arguments }
|
|
||||||
|
|
||||||
//TODO this shouldn't be an expression b/c type annotations disallowed here
|
|
||||||
rule invocation_argument(parser: &mut Parser) -> InvocationArgument =
|
|
||||||
_ "_" _ { InvocationArgument::Ignored } /
|
|
||||||
_ ident:identifier() _ "=" _ expr:expression(parser) { InvocationArgument::Keyword {
|
|
||||||
name: Rc::new(ident.to_string()),
|
|
||||||
expr
|
|
||||||
} } /
|
|
||||||
_ expr:expression(parser) _ { InvocationArgument::Positional(expr) }
|
|
||||||
|
|
||||||
|
|
||||||
rule primary(struct_ok: bool, parser: &mut Parser) -> ExpressionKind =
|
|
||||||
while_expr(parser) / for_expr(parser) / float_literal() / nat_literal() / bool_literal() /
|
|
||||||
string_literal() / paren_expr(parser) /
|
|
||||||
list_expr(parser) / if_expr(parser) / lambda_expr(parser) /
|
|
||||||
item:named_struct(parser) {? if struct_ok { Ok(item) } else { Err("no-struct-allowed") } } /
|
|
||||||
identifier_expr(parser)
|
|
||||||
|
|
||||||
rule lambda_expr(parser: &mut Parser) -> ExpressionKind =
|
|
||||||
r#"\"# __ "(" _ params:formal_params(parser) _ ")" _ type_anno:(type_anno()?) _ body:block(parser) {
|
|
||||||
ExpressionKind::Lambda { params, type_anno, body }
|
|
||||||
} /
|
|
||||||
r#"\"# param:formal_param(parser) _ type_anno:(type_anno()?) _ body:block(parser) {
|
|
||||||
ExpressionKind::Lambda { params: vec![param], type_anno, body }
|
|
||||||
}
|
|
||||||
|
|
||||||
rule for_expr(parser: &mut Parser) -> ExpressionKind =
|
|
||||||
"for" _ enumerators:for_enumerators(parser) _ body:for_body(parser) {
|
|
||||||
ExpressionKind::ForExpression { enumerators, body }
|
|
||||||
}
|
|
||||||
|
|
||||||
rule for_enumerators(parser: &mut Parser) -> Vec<Enumerator> =
|
|
||||||
"{" _ enumerators:(enumerator(parser) ++ ",") _ "}" { enumerators } /
|
|
||||||
enumerator:enumerator(parser) { vec![enumerator] }
|
|
||||||
|
|
||||||
//TODO add guards, etc.
|
|
||||||
rule enumerator(parser: &mut Parser) -> Enumerator =
|
|
||||||
ident:identifier() _ "<-" _ generator:expression_no_struct(parser) {
|
|
||||||
Enumerator { id: Rc::new(ident.to_string()), generator }
|
|
||||||
} /
|
|
||||||
//TODO need to distinguish these two cases in AST
|
|
||||||
ident:identifier() _ "=" _ generator:expression_no_struct(parser) {
|
|
||||||
Enumerator { id: Rc::new(ident.to_string()), generator }
|
|
||||||
}
|
|
||||||
|
|
||||||
rule for_body(parser: &mut Parser) -> Box<ForBody> =
|
|
||||||
"return" _ expr:expression(parser) { Box::new(ForBody::MonadicReturn(expr)) } /
|
|
||||||
body:block(parser) { Box::new(ForBody::StatementBlock(body)) }
|
|
||||||
|
|
||||||
rule while_expr(parser: &mut Parser) -> ExpressionKind =
|
|
||||||
"while" _ cond:expression_kind(false, parser)? _ body:block(parser) {
|
|
||||||
ExpressionKind::WhileExpression {
|
|
||||||
condition: cond.map(|kind| Box::new(Expression::new(parser.fresh(), kind))),
|
|
||||||
body,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
rule identifier_expr(parser: &mut Parser) -> ExpressionKind =
|
|
||||||
qn:qualified_identifier(parser) { ExpressionKind::Value(qn) }
|
|
||||||
|
|
||||||
rule named_struct(parser: &mut Parser) -> ExpressionKind =
|
|
||||||
name:qualified_identifier(parser) _ fields:record_block(parser) {
|
|
||||||
ExpressionKind::NamedStruct {
|
|
||||||
name,
|
|
||||||
fields: fields.into_iter().map(|(n, exp)| (Rc::new(n.to_string()), exp)).collect(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//TODO anonymous structs, update syntax for structs
|
|
||||||
rule record_block(parser: &mut Parser) -> Vec<(&'input str, Expression)> =
|
|
||||||
"{" _ entries:(record_entry(parser) ** ",") _ "}" { entries }
|
|
||||||
|
|
||||||
rule record_entry(parser: &mut Parser) -> (&'input str, Expression) =
|
|
||||||
_ name:identifier() _ ":" _ expr:expression(parser) _ { (name, expr) }
|
|
||||||
|
|
||||||
rule qualified_identifier(parser: &mut Parser) -> QualifiedName =
|
|
||||||
names:(identifier() ++ "::") { QualifiedName { id: parser.fresh(), components: names.into_iter().map(|name| Rc::new(name.to_string())).collect() } }
|
|
||||||
|
|
||||||
//TODO improve the definition of identifiers
|
|
||||||
rule identifier() -> &'input str =
|
|
||||||
!(reserved() !(ident_continuation())) text:$(['a'..='z' | 'A'..='Z' | '_'] ident_continuation()*) { text }
|
|
||||||
|
|
||||||
rule ident_continuation() -> &'input str =
|
|
||||||
text:$(['a'..='z' | 'A'..='Z' | '0'..='9' | '_'])
|
|
||||||
|
|
||||||
rule reserved() = "if" / "then" / "else" / "is" / "fn" / "for" / "while" / "let" / "in" / "mut" / "return" /
|
|
||||||
"break" / "alias" / "type" / "self" / "Self" / "interface" / "impl" / "true" / "false" / "module" / "import"
|
|
||||||
|
|
||||||
|
|
||||||
rule if_expr(parser: &mut Parser) -> ExpressionKind =
|
|
||||||
"if" _ discriminator:(expression(parser)?) _ body:if_expr_body(parser) {
|
|
||||||
ExpressionKind::IfExpression {
|
|
||||||
discriminator: discriminator.map(Box::new),
|
|
||||||
body: Box::new(body),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rule if_expr_body(parser: &mut Parser) -> IfExpressionBody =
|
|
||||||
cond_block(parser) / simple_pattern_match(parser) / simple_conditional(parser)
|
|
||||||
|
|
||||||
rule simple_conditional(parser: &mut Parser) -> IfExpressionBody =
|
|
||||||
"then" _ then_case:expr_or_block(parser) _ else_case:else_case(parser) {
|
|
||||||
IfExpressionBody::SimpleConditional { then_case, else_case }
|
|
||||||
}
|
|
||||||
|
|
||||||
rule simple_pattern_match(parser: &mut Parser) -> IfExpressionBody =
|
|
||||||
"is" _ pattern:pattern(parser) _ "then" _ then_case:expr_or_block(parser) _ else_case:else_case(parser) {
|
|
||||||
IfExpressionBody::SimplePatternMatch { pattern, then_case, else_case }
|
|
||||||
}
|
|
||||||
|
|
||||||
rule cond_block(parser: &mut Parser) -> IfExpressionBody =
|
|
||||||
"{" __ cond_arms:(cond_arm(parser) ++ (delimiter()+)) __ "}" { IfExpressionBody::CondList(cond_arms) }
|
|
||||||
|
|
||||||
rule cond_arm(parser: &mut Parser) -> ConditionArm =
|
|
||||||
_ "else" _ body:expr_or_block(parser) { ConditionArm { condition: Condition::Else, guard: None, body } } /
|
|
||||||
_ condition:condition(parser) _ guard:condition_guard(parser) _ "then" _ body:expr_or_block(parser)
|
|
||||||
{ ConditionArm { condition, guard, body } }
|
|
||||||
|
|
||||||
rule condition(parser: &mut Parser) -> Condition =
|
|
||||||
"is" _ pat:pattern(parser) { Condition::Pattern(pat) } /
|
|
||||||
op:operator() _ expr:expression(parser) { Condition::TruncatedOp(BinOp::from_sigil(op), expr) }
|
|
||||||
|
|
||||||
rule condition_guard(parser: &mut Parser) -> Option<Expression> =
|
|
||||||
("if" _ expr:expression(parser) { expr } )?
|
|
||||||
|
|
||||||
rule expr_or_block(parser: &mut Parser) -> Block = block(parser) / pos:position!() ex:expression(parser) {
|
|
||||||
Statement {
|
|
||||||
id: parser.fresh() , location: pos.into(),
|
|
||||||
kind: StatementKind::Expression(ex)
|
|
||||||
}.into()
|
|
||||||
}
|
|
||||||
|
|
||||||
rule else_case(parser: &mut Parser) -> Option<Block> =
|
|
||||||
("else" _ eorb:expr_or_block(parser) { eorb })?
|
|
||||||
|
|
||||||
rule pattern(parser: &mut Parser) -> Pattern =
|
|
||||||
"(" _ variants:(pattern(parser) ++ ",") _ ")" { Pattern::TuplePattern(variants) } /
|
|
||||||
_ pat:simple_pattern(parser) { pat }
|
|
||||||
|
|
||||||
rule simple_pattern(parser: &mut Parser) -> Pattern =
|
|
||||||
pattern_literal() /
|
|
||||||
qn:qualified_identifier(parser) "(" members:(pattern(parser) ** ",") ")" {
|
|
||||||
Pattern::TupleStruct(qn, members)
|
|
||||||
} /
|
|
||||||
qn:qualified_identifier(parser) _ "{" _ items:(record_pattern_entry(parser) ** ",") "}" _ {
|
|
||||||
let items = items.into_iter().map(|(name, pat)| (Rc::new(name.to_string()), pat)).collect();
|
|
||||||
Pattern::Record(qn, items)
|
|
||||||
} /
|
|
||||||
qn:qualified_identifier(parser) { Pattern::VarOrName(qn) }
|
|
||||||
|
|
||||||
rule record_pattern_entry(parser: &mut Parser) -> (&'input str, Pattern) =
|
|
||||||
_ name:identifier() _ ":" _ pat:pattern(parser) _ { (name, pat) } /
|
|
||||||
_ name:identifier() _ {
|
|
||||||
let qn = QualifiedName {
|
|
||||||
id: parser.fresh(),
|
|
||||||
components: vec![Rc::new(name.to_string())],
|
|
||||||
};
|
|
||||||
(name, Pattern::VarOrName(qn))
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
rule pattern_literal() -> Pattern =
|
|
||||||
"true" { Pattern::Literal(PatternLiteral::BoolPattern(true)) } /
|
|
||||||
"false" { Pattern::Literal(PatternLiteral::BoolPattern(false)) } /
|
|
||||||
s:bare_string_literal() { Pattern::Literal(PatternLiteral::StringPattern(Rc::new(s.to_string()))) } /
|
|
||||||
sign:("-"?) num:(float_literal() / nat_literal()) {
|
|
||||||
let neg = sign.is_some();
|
|
||||||
Pattern::Literal(PatternLiteral::NumPattern { neg, num })
|
|
||||||
} /
|
|
||||||
"_" { Pattern::Ignored }
|
|
||||||
|
|
||||||
|
|
||||||
rule list_expr(parser: &mut Parser) -> ExpressionKind =
|
|
||||||
"[" exprs:(expression(parser) ** ",") "]" {
|
|
||||||
let mut exprs = exprs;
|
|
||||||
ExpressionKind::ListLiteral(exprs)
|
|
||||||
}
|
|
||||||
|
|
||||||
rule paren_expr(parser: &mut Parser) -> ExpressionKind =
|
|
||||||
"(" exprs:(expression(parser) ** ",") ")" {
|
|
||||||
let mut exprs = exprs;
|
|
||||||
match exprs.len() {
|
|
||||||
1 => exprs.pop().unwrap().kind,
|
|
||||||
_ => ExpressionKind::TupleLiteral(exprs),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//TODO need to do something with prefix in the AST
|
|
||||||
rule string_literal() -> ExpressionKind =
|
|
||||||
prefix:identifier()? s:bare_string_literal(){ ExpressionKind::StringLiteral(Rc::new(s.to_string())) }
|
|
||||||
|
|
||||||
rule bare_string_literal() -> &'input str =
|
|
||||||
"\"" s:$(string_component()*) "\"" { s }
|
|
||||||
|
|
||||||
rule string_component() -> &'input str =
|
|
||||||
r#"\\"# { "\\" } /
|
|
||||||
r#"\""# { "\"" } /
|
|
||||||
r#"\t"# { "\t" } /
|
|
||||||
r#"\n"# { "\n" } /
|
|
||||||
ch:$([^ '"' ]) { ch }
|
|
||||||
|
|
||||||
rule bool_literal() -> ExpressionKind =
|
|
||||||
"true" { ExpressionKind::BoolLiteral(true) } / "false" { ExpressionKind::BoolLiteral(false) }
|
|
||||||
|
|
||||||
rule nat_literal() -> ExpressionKind =
|
|
||||||
bin_literal() / hex_literal() / unmarked_literal()
|
|
||||||
|
|
||||||
rule unmarked_literal() -> ExpressionKind =
|
|
||||||
digits:digits() { ExpressionKind::NatLiteral(digits.parse().unwrap()) }
|
|
||||||
|
|
||||||
rule bin_literal() -> ExpressionKind =
|
|
||||||
"0b" digits:bin_digits() {? parse_binary(digits).map(ExpressionKind::NatLiteral) }
|
|
||||||
|
|
||||||
rule hex_literal() -> ExpressionKind =
|
|
||||||
"0x" digits:hex_digits() {? parse_hex(digits).map(ExpressionKind::NatLiteral) }
|
|
||||||
|
|
||||||
rule float_literal() -> ExpressionKind =
|
|
||||||
ds:$( digits() "." digits()? / "." digits() ) { ExpressionKind::FloatLiteral(ds.parse().unwrap()) }
|
|
||||||
|
|
||||||
rule digits() -> &'input str = $((digit_group() "_"*)+)
|
|
||||||
rule bin_digits() -> &'input str = $((bin_digit_group() "_"*)+)
|
|
||||||
rule hex_digits() -> &'input str = $((hex_digit_group() "_"*)+)
|
|
||||||
|
|
||||||
rule digit_group() -> &'input str = $(['0'..='9']+)
|
|
||||||
rule bin_digit_group() -> &'input str = $(['0' | '1']+)
|
|
||||||
rule hex_digit_group() -> &'input str = $(['0'..='9' | 'a'..='f' | 'A'..='F']+)
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_binary(digits: &str) -> Result<u64, &'static str> {
|
|
||||||
let mut result: u64 = 0;
|
|
||||||
let mut multiplier = 1;
|
|
||||||
for d in digits.chars().rev() {
|
|
||||||
match d {
|
|
||||||
'1' => result += multiplier,
|
|
||||||
'0' => (),
|
|
||||||
'_' => continue,
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
multiplier = match multiplier.checked_mul(2) {
|
|
||||||
Some(m) => m,
|
|
||||||
None => return Err("Binary expression will overflow"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(result)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_hex(digits: &str) -> Result<u64, &'static str> {
|
|
||||||
let mut result: u64 = 0;
|
|
||||||
let mut multiplier: u64 = 1;
|
|
||||||
for d in digits.chars().rev() {
|
|
||||||
if d == '_' {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
match d.to_digit(16) {
|
|
||||||
Some(n) => result += n as u64 * multiplier,
|
|
||||||
None => return Err("Internal parser error: invalid hex digit"),
|
|
||||||
}
|
|
||||||
multiplier = match multiplier.checked_mul(16) {
|
|
||||||
Some(m) => m,
|
|
||||||
None => return Err("Hexadecimal expression will overflow"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(result)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
struct BinopSequence {
|
|
||||||
first: ExpressionKind,
|
|
||||||
next: Vec<(BinOp, ExpressionKind)>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BinopSequence {
|
|
||||||
fn do_precedence(self, parser: &mut Parser) -> ExpressionKind {
|
|
||||||
fn helper(
|
|
||||||
precedence: i32,
|
|
||||||
lhs: ExpressionKind,
|
|
||||||
rest: &mut Vec<(BinOp, ExpressionKind)>,
|
|
||||||
parser: &mut Parser,
|
|
||||||
) -> Expression {
|
|
||||||
let mut lhs = Expression::new(parser.fresh(), lhs);
|
|
||||||
loop {
|
|
||||||
let (next_op, next_rhs) = match rest.pop() {
|
|
||||||
Some((a, b)) => (a, b),
|
|
||||||
None => break,
|
|
||||||
};
|
|
||||||
let new_precedence = next_op.get_precedence();
|
|
||||||
if precedence >= new_precedence {
|
|
||||||
rest.push((next_op, next_rhs));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
let rhs = helper(new_precedence, next_rhs, rest, parser);
|
|
||||||
lhs = Expression::new(
|
|
||||||
parser.fresh(),
|
|
||||||
ExpressionKind::BinExp(next_op, Box::new(lhs), Box::new(rhs)),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
lhs
|
|
||||||
}
|
|
||||||
let mut as_stack = self.next.into_iter().rev().collect();
|
|
||||||
helper(BinOp::min_precedence(), self.first, &mut as_stack, parser).kind
|
|
||||||
}
|
|
||||||
}
|
|
@ -6,8 +6,8 @@ use std::{fmt::Write, rc::Rc};
|
|||||||
|
|
||||||
use pretty_assertions::assert_eq;
|
use pretty_assertions::assert_eq;
|
||||||
|
|
||||||
use super::new::{schala_parser, Parser};
|
use super::{tokenize, ParseResult, Parser};
|
||||||
use crate::{ast::*, parsing::Location};
|
use crate::{ast::*, tokenizing::Location};
|
||||||
|
|
||||||
fn rc(s: &str) -> Rc<String> {
|
fn rc(s: &str) -> Rc<String> {
|
||||||
Rc::new(s.to_owned())
|
Rc::new(s.to_owned())
|
||||||
@ -17,6 +17,18 @@ fn bx<T>(item: T) -> Box<T> {
|
|||||||
Box::new(item)
|
Box::new(item)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn make_parser(input: &str) -> Parser {
|
||||||
|
let tokens: Vec<crate::tokenizing::Token> = tokenize(input);
|
||||||
|
let mut parser = super::Parser::new();
|
||||||
|
parser.add_new_tokens(tokens);
|
||||||
|
parser
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse(input: &str) -> ParseResult<AST> {
|
||||||
|
let mut parser = make_parser(input);
|
||||||
|
parser.parse()
|
||||||
|
}
|
||||||
|
|
||||||
fn stmt(kind: StatementKind) -> Statement {
|
fn stmt(kind: StatementKind) -> Statement {
|
||||||
Statement { location: Location::default(), id: ItemId::default(), kind }
|
Statement { location: Location::default(), id: ItemId::default(), kind }
|
||||||
}
|
}
|
||||||
@ -87,43 +99,33 @@ fn ty_simple(name: &str) -> TypeIdentifier {
|
|||||||
|
|
||||||
macro_rules! assert_ast {
|
macro_rules! assert_ast {
|
||||||
($input:expr, $statements:expr) => {
|
($input:expr, $statements:expr) => {
|
||||||
let mut parser = Parser::new();
|
let ast = parse($input).unwrap();
|
||||||
let ast = schala_parser::program($input, &mut parser);
|
|
||||||
let expected = AST { id: Default::default(), statements: $statements.into() };
|
let expected = AST { id: Default::default(), statements: $statements.into() };
|
||||||
if ast.is_err() {
|
println!("Expected: {}", expected);
|
||||||
println!("Parse error: {}", ast.unwrap_err());
|
println!("Actual: {}", ast);
|
||||||
panic!();
|
assert_eq!(ast, expected);
|
||||||
}
|
|
||||||
assert_eq!(ast.unwrap(), expected);
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! assert_fail {
|
macro_rules! assert_fail {
|
||||||
($input:expr, $failure:expr) => {
|
($input:expr, $failure:expr) => {
|
||||||
let mut parser = Parser::new();
|
let err = parse($input).unwrap_err();
|
||||||
let err = schala_parser::program($input, &mut parser).unwrap_err();
|
assert_eq!(err.msg, $failure);
|
||||||
assert_eq!(err.to_string(), $failure);
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! assert_expr {
|
macro_rules! assert_expr {
|
||||||
($input:expr, $correct:expr) => {
|
($input:expr, $correct:expr) => {
|
||||||
let mut parser = Parser::new();
|
let mut parser = make_parser($input);
|
||||||
let expr = schala_parser::expression($input, &mut parser);
|
assert_eq!(parser.expression().unwrap(), $correct);
|
||||||
if expr.is_err() {
|
|
||||||
println!("Expression parse error: {}", expr.unwrap_err());
|
|
||||||
panic!();
|
|
||||||
}
|
|
||||||
assert_eq!(expr.unwrap(), $correct);
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! assert_fail_expr {
|
macro_rules! assert_fail_expr {
|
||||||
($input:expr, $failure:expr) => {
|
($input:expr, $failure:expr) => {
|
||||||
let mut parser = Parser::new();
|
let mut parser = make_parser($input);
|
||||||
let _err = schala_parser::expression($input, &mut parser).unwrap_err();
|
let err = parser.expression().unwrap_err();
|
||||||
//TODO make real tests for failures
|
assert_eq!(err.msg, $failure);
|
||||||
//assert_eq!(err.to_string(), $failure);
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
@ -139,17 +141,7 @@ fn basic_literals() {
|
|||||||
assert_expr!("0xf_f_", expr(NatLiteral(255)));
|
assert_expr!("0xf_f_", expr(NatLiteral(255)));
|
||||||
assert_expr!("false", expr(BoolLiteral(false)));
|
assert_expr!("false", expr(BoolLiteral(false)));
|
||||||
assert_expr!("true", expr(BoolLiteral(true)));
|
assert_expr!("true", expr(BoolLiteral(true)));
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn string_literals() {
|
|
||||||
use ExpressionKind::*;
|
|
||||||
|
|
||||||
assert_expr!(r#""""#, expr(StringLiteral(rc(""))));
|
|
||||||
assert_expr!(r#""hello""#, expr(StringLiteral(rc("hello"))));
|
assert_expr!(r#""hello""#, expr(StringLiteral(rc("hello"))));
|
||||||
assert_expr!(r#"b"some bytestring""#, expr(StringLiteral(rc("some bytestring"))));
|
|
||||||
//NOTE I'm not 100% sure this case is correct, but I'll deal with it later
|
|
||||||
assert_expr!(r#""Do \n \" escapes work\t""#, expr(StringLiteral(rc(r#"Do \n \" escapes work\t"#))));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -158,7 +150,6 @@ fn list_literals() {
|
|||||||
|
|
||||||
assert_expr!("[]", expr(ListLiteral(vec![])));
|
assert_expr!("[]", expr(ListLiteral(vec![])));
|
||||||
assert_expr!("[1,2]", expr(ListLiteral(vec![expr(NatLiteral(1)), expr(NatLiteral(2)),])));
|
assert_expr!("[1,2]", expr(ListLiteral(vec![expr(NatLiteral(1)), expr(NatLiteral(2)),])));
|
||||||
assert_fail_expr!("[1,,2]", "some failure");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -167,13 +158,17 @@ fn binexps() {
|
|||||||
use StatementKind::Expression;
|
use StatementKind::Expression;
|
||||||
|
|
||||||
assert_expr!("0xf_f_+1", binop("+", expr(NatLiteral(255)), expr(NatLiteral(1))));
|
assert_expr!("0xf_f_+1", binop("+", expr(NatLiteral(255)), expr(NatLiteral(1))));
|
||||||
assert_ast!(
|
assert_eq!(
|
||||||
"3; 4; 4.3",
|
parse("3; 4; 4.3").unwrap(),
|
||||||
vec![
|
AST {
|
||||||
stmt(Expression(expr(NatLiteral(3)))),
|
id: Default::default(),
|
||||||
stmt(Expression(expr(NatLiteral(4)))),
|
statements: vec![
|
||||||
stmt(Expression(expr(FloatLiteral(4.3)))),
|
stmt(Expression(expr(NatLiteral(3)))),
|
||||||
]
|
stmt(Expression(expr(NatLiteral(4)))),
|
||||||
|
stmt(Expression(expr(FloatLiteral(4.3)))),
|
||||||
|
]
|
||||||
|
.into()
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_expr!(
|
assert_expr!(
|
||||||
@ -312,22 +307,12 @@ fn named_struct() {
|
|||||||
fn index() {
|
fn index() {
|
||||||
use ExpressionKind::*;
|
use ExpressionKind::*;
|
||||||
assert_expr!(
|
assert_expr!(
|
||||||
"armok[b,c]",
|
"a[b,c]",
|
||||||
expr(Index {
|
expr(Index {
|
||||||
indexee: bx(expr(Value(qn!(armok)))),
|
indexee: bx(expr(Value(qn!(a)))),
|
||||||
indexers: vec![expr(Value(qn!(b))), expr(Value(qn!(c)))]
|
indexers: vec![expr(Value(qn!(b))), expr(Value(qn!(c)))]
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
assert_expr!(
|
|
||||||
"a[b,c][1]",
|
|
||||||
expr(Index {
|
|
||||||
indexee: bx(expr(Index {
|
|
||||||
indexee: bx(expr(Value(qn!(a)))),
|
|
||||||
indexers: vec![expr(Value(qn!(b))), expr(Value(qn!(c)))]
|
|
||||||
})),
|
|
||||||
indexers: vec![expr(NatLiteral(1))]
|
|
||||||
})
|
|
||||||
);
|
|
||||||
assert_expr!(
|
assert_expr!(
|
||||||
"perspicacity()[a]",
|
"perspicacity()[a]",
|
||||||
expr(Index {
|
expr(Index {
|
||||||
@ -372,7 +357,7 @@ fn for_expression() {
|
|||||||
);
|
);
|
||||||
|
|
||||||
assert_expr!(
|
assert_expr!(
|
||||||
"for n <- someRange { f(n) ; }",
|
"for n <- someRange { f(n); }",
|
||||||
expr(ForExpression {
|
expr(ForExpression {
|
||||||
enumerators: vec![Enumerator { id: rc("n"), generator: expr(Value(qn!(someRange))) }],
|
enumerators: vec![Enumerator { id: rc("n"), generator: expr(Value(qn!(someRange))) }],
|
||||||
body: bx(ForBody::StatementBlock(
|
body: bx(ForBody::StatementBlock(
|
||||||
@ -480,9 +465,8 @@ fn single_param_lambda() {
|
|||||||
fn complex_lambdas() {
|
fn complex_lambdas() {
|
||||||
use ExpressionKind::*;
|
use ExpressionKind::*;
|
||||||
|
|
||||||
//TODO support this without the semicolon after the lambda
|
|
||||||
assert_ast! {
|
assert_ast! {
|
||||||
r#"fn wahoo() { let a = 10; \(x) { x + a }; }
|
r#"fn wahoo() { let a = 10; \(x) { x + a } };
|
||||||
wahoo()(3) "#,
|
wahoo()(3) "#,
|
||||||
vec![
|
vec![
|
||||||
fn_decl(Signature { name: rc("wahoo"), operator: false, type_anno: None, params: vec![] },
|
fn_decl(Signature { name: rc("wahoo"), operator: false, type_anno: None, params: vec![] },
|
||||||
@ -517,7 +501,7 @@ fn complex_lambdas() {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn reserved_words() {
|
fn reserved_words() {
|
||||||
assert_fail!("module::item::call()", "error at 1:7: expected ['a' ..= 'z' | 'A' ..= 'Z' | '_']");
|
assert_fail!("module::item::call()", "Expected an identifier, got Colon");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -781,7 +765,7 @@ fn functions() {
|
|||||||
]
|
]
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
assert_fail!("a(b,,c)","error at 1:5: expected one of \"(\", \".\", \"0b\", \"0x\", \"[\", \"\\\"\", \"_\", \"false\", \"for\", \"if\", \"true\", \"while\", ['+' | '-' | '!'], ['0' ..= '9'], ['a' ..= 'z' | 'A' ..= 'Z' | '_'], r#\"\\\"#");
|
assert_fail!("a(b,,c)", "Expected a literal expression, got Comma");
|
||||||
|
|
||||||
assert_ast!(
|
assert_ast!(
|
||||||
"fn a(b, c: Int): Int",
|
"fn a(b, c: Int): Int",
|
||||||
@ -802,19 +786,6 @@ fn functions() {
|
|||||||
type_anno: Some(TypeIdentifier::Singleton(TypeSingletonName { name: rc("Int"), params: vec![] })),
|
type_anno: Some(TypeIdentifier::Singleton(TypeSingletonName { name: rc("Int"), params: vec![] })),
|
||||||
})))]
|
})))]
|
||||||
);
|
);
|
||||||
|
|
||||||
let source = r#"
|
|
||||||
fn some_function() {
|
|
||||||
|
|
||||||
}"#;
|
|
||||||
|
|
||||||
assert_ast!(
|
|
||||||
source,
|
|
||||||
vec![fn_decl(
|
|
||||||
Signature { name: rc("some_function"), operator: false, type_anno: None, params: vec![] },
|
|
||||||
vec![].into()
|
|
||||||
)]
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -824,9 +795,7 @@ fn max_function_params() {
|
|||||||
write!(buf, "a{}, ", n).unwrap();
|
write!(buf, "a{}, ", n).unwrap();
|
||||||
}
|
}
|
||||||
write!(buf, ") {{ return 20 }}").unwrap();
|
write!(buf, ") {{ return 20 }}").unwrap();
|
||||||
//assert_fail!(&buf, "A function cannot have more than 255 arguments");
|
assert_fail!(&buf, "A function cannot have more than 255 arguments");
|
||||||
//TODO better errors again
|
|
||||||
assert_fail!(&buf, "error at 1:1439: expected ['a' ..= 'z' | 'A' ..= 'Z' | '_']");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -914,43 +883,44 @@ fn interface() {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn impls() {
|
fn impls() {
|
||||||
use Declaration::{FuncDecl, Impl};
|
use Declaration::{FuncSig, Impl};
|
||||||
|
|
||||||
let block = vec![
|
|
||||||
FuncDecl(
|
|
||||||
Signature { name: rc("yolo"), operator: false, params: vec![], type_anno: None },
|
|
||||||
vec![].into(),
|
|
||||||
),
|
|
||||||
FuncDecl(
|
|
||||||
Signature { name: rc("swagg"), operator: false, params: vec![], type_anno: None },
|
|
||||||
vec![].into(),
|
|
||||||
),
|
|
||||||
];
|
|
||||||
|
|
||||||
assert_ast!(
|
assert_ast!(
|
||||||
"impl Heh { fn yolo() { }; fn swagg() { } }",
|
"impl Heh { fn yolo(); fn swagg(); }",
|
||||||
vec![decl(Impl { type_name: ty_simple("Heh"), interface_name: None, block: block.clone() })]
|
vec![decl(Impl {
|
||||||
|
type_name: ty_simple("Heh"),
|
||||||
|
interface_name: None,
|
||||||
|
block: vec![
|
||||||
|
FuncSig(Signature { name: rc("yolo"), operator: false, params: vec![], type_anno: None }),
|
||||||
|
FuncSig(Signature { name: rc("swagg"), operator: false, params: vec![], type_anno: None })
|
||||||
|
]
|
||||||
|
})]
|
||||||
);
|
);
|
||||||
|
|
||||||
//TODO `"impl Heh<X> { fn yolo() { }; fn swagg() { }; }"` ought to work
|
|
||||||
assert_ast!(
|
assert_ast!(
|
||||||
"impl Heh<X> { fn yolo() { }; fn swagg() { } }",
|
"impl Heh<X> { fn yolo(); fn swagg(); }",
|
||||||
vec![decl(Impl {
|
vec![decl(Impl {
|
||||||
type_name: TypeIdentifier::Singleton(TypeSingletonName {
|
type_name: TypeIdentifier::Singleton(TypeSingletonName {
|
||||||
name: rc("Heh"),
|
name: rc("Heh"),
|
||||||
params: vec![ty_simple("X")]
|
params: vec![ty_simple("X")]
|
||||||
}),
|
}),
|
||||||
interface_name: None,
|
interface_name: None,
|
||||||
block: block.clone(),
|
block: vec![
|
||||||
|
FuncSig(Signature { name: rc("yolo"), operator: false, params: vec![], type_anno: None }),
|
||||||
|
FuncSig(Signature { name: rc("swagg"), operator: false, params: vec![], type_anno: None })
|
||||||
|
]
|
||||||
})]
|
})]
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_ast!(
|
assert_ast!(
|
||||||
"impl Heh for Saraz { fn yolo() {}; fn swagg() {} }",
|
"impl Heh for Saraz { fn yolo(); fn swagg(); }",
|
||||||
vec![decl(Impl {
|
vec![decl(Impl {
|
||||||
type_name: ty_simple("Saraz"),
|
type_name: ty_simple("Saraz"),
|
||||||
interface_name: Some(TypeSingletonName { name: rc("Heh"), params: vec![] }),
|
interface_name: Some(TypeSingletonName { name: rc("Heh"), params: vec![] }),
|
||||||
block: block.clone(),
|
block: vec![
|
||||||
|
FuncSig(Signature { name: rc("yolo"), operator: false, params: vec![], type_anno: None }),
|
||||||
|
FuncSig(Signature { name: rc("swagg"), operator: false, params: vec![], type_anno: None })
|
||||||
|
]
|
||||||
})]
|
})]
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -1178,7 +1148,7 @@ fn pattern_matching() {
|
|||||||
);
|
);
|
||||||
|
|
||||||
assert_expr!(
|
assert_expr!(
|
||||||
"if x { is 1 then 5; else 20 }",
|
"if x { is 1 then 5, else 20 }",
|
||||||
expr(IfExpression {
|
expr(IfExpression {
|
||||||
discriminator: Some(bx(expr(Value(qn!(x))))),
|
discriminator: Some(bx(expr(Value(qn!(x))))),
|
||||||
body: bx(IfExpressionBody::CondList(vec![
|
body: bx(IfExpressionBody::CondList(vec![
|
||||||
@ -1214,7 +1184,7 @@ fn pattern_matching() {
|
|||||||
assert_expr! {
|
assert_expr! {
|
||||||
r#"
|
r#"
|
||||||
if (45, "panda", false, 2.2) {
|
if (45, "panda", false, 2.2) {
|
||||||
is (49, "pablo", _, 28.4) then "no"
|
is (49, "pablo", _, 28.4) then "no"
|
||||||
is (_, "panda", _, -2.2) then "yes"
|
is (_, "panda", _, -2.2) then "yes"
|
||||||
is _ then "maybe"
|
is _ then "maybe"
|
||||||
}"#,
|
}"#,
|
||||||
@ -1293,62 +1263,3 @@ fn flow_control() {
|
|||||||
)]
|
)]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn blocks() {
|
|
||||||
use ExpressionKind::*;
|
|
||||||
|
|
||||||
let cases = ["{ a }", "{ a; }", "{a}", "{ a\n }", "{ a\n\n }", "{ a;\n\n; }"];
|
|
||||||
|
|
||||||
let mut parser = Parser::new();
|
|
||||||
for case in cases.iter() {
|
|
||||||
let block = schala_parser::block(case, &mut parser);
|
|
||||||
assert_eq!(block.unwrap(), vec![exst(Value(qn!(a)))].into());
|
|
||||||
}
|
|
||||||
|
|
||||||
let source = r#"{
|
|
||||||
fn quah() {
|
|
||||||
fn foo() { }
|
|
||||||
}
|
|
||||||
}"#;
|
|
||||||
let block = schala_parser::block(source, &mut parser);
|
|
||||||
assert_eq!(
|
|
||||||
block.unwrap(),
|
|
||||||
vec![decl(Declaration::FuncDecl(
|
|
||||||
Signature { name: rc("quah"), operator: false, params: vec![], type_anno: None },
|
|
||||||
vec![decl(Declaration::FuncDecl(
|
|
||||||
Signature { name: rc("foo"), operator: false, params: vec![], type_anno: None },
|
|
||||||
vec![].into(),
|
|
||||||
))]
|
|
||||||
.into()
|
|
||||||
))]
|
|
||||||
.into()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn comments() {
|
|
||||||
use ExpressionKind::*;
|
|
||||||
|
|
||||||
let source = "1 + /* hella /* bro */ */ 2";
|
|
||||||
assert_expr!(source, binop("+", expr(NatLiteral(1)), expr(NatLiteral(2))));
|
|
||||||
|
|
||||||
//TODO make sure this error message makes sense
|
|
||||||
let source = "1 + /* hella /* bro */ 2";
|
|
||||||
assert_fail_expr!(source, "foo");
|
|
||||||
|
|
||||||
let source = "1 + /* hella */ bro */ 2";
|
|
||||||
assert_fail_expr!(source, binop("+", expr(NatLiteral(1)), expr(NatLiteral(2))));
|
|
||||||
|
|
||||||
let source = "5//no man\n";
|
|
||||||
assert_ast!(source, vec![exst(NatLiteral(5))]);
|
|
||||||
}
|
|
||||||
|
|
||||||
//TODO support backtick operators like this
|
|
||||||
/*
|
|
||||||
#[test]
|
|
||||||
fn backtick_operators() {
|
|
||||||
let output = token_kinds("1 `plus` 2");
|
|
||||||
assert_eq!(output, vec![digit!("1"), op!("plus"), digit!("2")]);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
@ -247,6 +247,8 @@ impl<'a, 'b> Reducer<'a, 'b> {
|
|||||||
let mut alternatives = vec![];
|
let mut alternatives = vec![];
|
||||||
for arm in condition_arms {
|
for arm in condition_arms {
|
||||||
match arm.condition {
|
match arm.condition {
|
||||||
|
ast::Condition::Expression(ref _expr) =>
|
||||||
|
return Expression::ReductionError("case-expression".to_string()),
|
||||||
ast::Condition::Pattern(ref pat) => {
|
ast::Condition::Pattern(ref pat) => {
|
||||||
let alt = Alternative {
|
let alt = Alternative {
|
||||||
pattern: match pat.reduce(self.symbol_table) {
|
pattern: match pat.reduce(self.symbol_table) {
|
||||||
|
@ -5,7 +5,7 @@ use schala_repl::{
|
|||||||
use stopwatch::Stopwatch;
|
use stopwatch::Stopwatch;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
error::SchalaError, parsing, reduced_ir, symbol_table, tree_walk_eval, type_inference,
|
error::SchalaError, parsing, reduced_ir, symbol_table, tokenizing, tree_walk_eval, type_inference,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// All the state necessary to parse and execute a Schala program are stored in this struct.
|
/// All the state necessary to parse and execute a Schala program are stored in this struct.
|
||||||
@ -19,7 +19,7 @@ pub struct Schala<'a> {
|
|||||||
/// Contains information for type-checking
|
/// Contains information for type-checking
|
||||||
type_context: type_inference::TypeContext,
|
type_context: type_inference::TypeContext,
|
||||||
/// Schala Parser
|
/// Schala Parser
|
||||||
active_parser: parsing::new::Parser,
|
active_parser: parsing::Parser,
|
||||||
|
|
||||||
/// Execution state for AST-walking interpreter
|
/// Execution state for AST-walking interpreter
|
||||||
eval_state: tree_walk_eval::State<'a>,
|
eval_state: tree_walk_eval::State<'a>,
|
||||||
@ -47,7 +47,7 @@ impl<'a> Schala<'a> {
|
|||||||
source_reference: SourceReference::new(),
|
source_reference: SourceReference::new(),
|
||||||
symbol_table: symbol_table::SymbolTable::new(),
|
symbol_table: symbol_table::SymbolTable::new(),
|
||||||
type_context: type_inference::TypeContext::new(),
|
type_context: type_inference::TypeContext::new(),
|
||||||
active_parser: parsing::new::Parser::new(),
|
active_parser: parsing::Parser::new(),
|
||||||
eval_state: tree_walk_eval::State::new(),
|
eval_state: tree_walk_eval::State::new(),
|
||||||
timings: Vec::new(),
|
timings: Vec::new(),
|
||||||
}
|
}
|
||||||
@ -74,10 +74,18 @@ impl<'a> Schala<'a> {
|
|||||||
self.timings = vec![];
|
self.timings = vec![];
|
||||||
let sw = Stopwatch::start_new();
|
let sw = Stopwatch::start_new();
|
||||||
|
|
||||||
self.source_reference.load_new_source(source);
|
// 1st stage - tokenization
|
||||||
|
// TODO tokenize should return its own error type
|
||||||
|
let tokens = tokenizing::tokenize(source);
|
||||||
|
if let Some(err) = SchalaError::from_tokens(&tokens) {
|
||||||
|
return Err(err);
|
||||||
|
}
|
||||||
|
|
||||||
|
//2nd stage - parsing
|
||||||
|
self.active_parser.add_new_tokens(tokens);
|
||||||
let ast = self
|
let ast = self
|
||||||
.active_parser
|
.active_parser
|
||||||
.parse(source)
|
.parse()
|
||||||
.map_err(|err| SchalaError::from_parse_error(err, &self.source_reference))?;
|
.map_err(|err| SchalaError::from_parse_error(err, &self.source_reference))?;
|
||||||
self.timings.push(("parsing", sw.elapsed()));
|
self.timings.push(("parsing", sw.elapsed()));
|
||||||
|
|
||||||
@ -114,50 +122,31 @@ impl<'a> Schala<'a> {
|
|||||||
|
|
||||||
/// Represents lines of source code
|
/// Represents lines of source code
|
||||||
pub(crate) struct SourceReference {
|
pub(crate) struct SourceReference {
|
||||||
last_source: Option<String>,
|
lines: Option<Vec<String>>,
|
||||||
/// Offsets in *bytes* (not chars) representing a newline character
|
|
||||||
newline_offsets: Vec<usize>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SourceReference {
|
impl SourceReference {
|
||||||
pub(crate) fn new() -> SourceReference {
|
fn new() -> SourceReference {
|
||||||
SourceReference { last_source: None, newline_offsets: vec![] }
|
SourceReference { lines: None }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn load_new_source(&mut self, source: &str) {
|
fn load_new_source(&mut self, source: &str) {
|
||||||
self.newline_offsets = vec![];
|
//TODO this is a lot of heap allocations - maybe there's a way to make it more efficient?
|
||||||
for (offset, ch) in source.as_bytes().iter().enumerate() {
|
self.lines = Some(source.lines().map(|s| s.to_string()).collect());
|
||||||
if *ch == ('\n' as u8) {
|
|
||||||
self.newline_offsets.push(offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self.last_source = Some(source.to_string());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// (line_start, line_num, the string itself)
|
pub fn get_line(&self, line: usize) -> String {
|
||||||
pub fn get_line(&self, line: usize) -> (usize, usize, String) {
|
self.lines
|
||||||
if self.newline_offsets.is_empty() {
|
.as_ref()
|
||||||
return (0, 0, self.last_source.as_ref().cloned().unwrap());
|
.and_then(|x| x.get(line).map(|s| s.to_string()))
|
||||||
}
|
.unwrap_or_else(|| "NO LINE FOUND".to_string())
|
||||||
|
|
||||||
//TODO make sure this is utf8-safe
|
|
||||||
let start_idx = match self.newline_offsets.binary_search(&line) {
|
|
||||||
Ok(index) | Err(index) => index,
|
|
||||||
};
|
|
||||||
|
|
||||||
let last_source = self.last_source.as_ref().unwrap();
|
|
||||||
|
|
||||||
let start = self.newline_offsets[start_idx];
|
|
||||||
let end = self.newline_offsets.get(start_idx + 1).cloned().unwrap_or_else(|| last_source.len());
|
|
||||||
|
|
||||||
let slice = &last_source.as_bytes()[start..end];
|
|
||||||
(start, start_idx, std::str::from_utf8(slice).unwrap().to_string())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
#[derive(Clone, Copy, Debug)]
|
#[derive(Clone, Copy, Debug)]
|
||||||
pub(crate) enum Stage {
|
pub(crate) enum Stage {
|
||||||
|
Tokenizing,
|
||||||
Parsing,
|
Parsing,
|
||||||
Symbols,
|
Symbols,
|
||||||
ScopeResolution,
|
ScopeResolution,
|
||||||
@ -167,7 +156,7 @@ pub(crate) enum Stage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn stage_names() -> Vec<&'static str> {
|
fn stage_names() -> Vec<&'static str> {
|
||||||
vec!["parsing", "symbol-table", "typechecking", "ast-reduction", "ast-walking-evaluation"]
|
vec!["tokenizing", "parsing", "symbol-table", "typechecking", "ast-reduction", "ast-walking-evaluation"]
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Clone)]
|
#[derive(Default, Clone)]
|
||||||
@ -188,6 +177,7 @@ impl<'a> ProgrammingLanguageInterface for Schala<'a> {
|
|||||||
|
|
||||||
fn run_computation(&mut self, request: ComputationRequest<Self::Config>) -> ComputationResponse {
|
fn run_computation(&mut self, request: ComputationRequest<Self::Config>) -> ComputationResponse {
|
||||||
let ComputationRequest { source, debug_requests: _, config: _ } = request;
|
let ComputationRequest { source, debug_requests: _, config: _ } = request;
|
||||||
|
self.source_reference.load_new_source(source);
|
||||||
let sw = Stopwatch::start_new();
|
let sw = Stopwatch::start_new();
|
||||||
|
|
||||||
let main_output =
|
let main_output =
|
||||||
|
@ -10,7 +10,7 @@ use crate::{
|
|||||||
ast,
|
ast,
|
||||||
ast::ItemId,
|
ast::ItemId,
|
||||||
builtin::Builtin,
|
builtin::Builtin,
|
||||||
parsing::Location,
|
tokenizing::Location,
|
||||||
type_inference::{TypeContext, TypeId},
|
type_inference::{TypeContext, TypeId},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ use crate::{
|
|||||||
TypeSingletonName, Variant, VariantKind, AST,
|
TypeSingletonName, Variant, VariantKind, AST,
|
||||||
},
|
},
|
||||||
builtin::Builtin,
|
builtin::Builtin,
|
||||||
parsing::Location,
|
tokenizing::Location,
|
||||||
type_inference::{self, PendingType, TypeBuilder, TypeContext, VariantBuilder},
|
type_inference::{self, PendingType, TypeBuilder, TypeContext, VariantBuilder},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
use assert_matches::assert_matches;
|
use assert_matches::assert_matches;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::util::quick_ast;
|
use crate::{tokenizing::Location, util::quick_ast};
|
||||||
|
|
||||||
fn add_symbols(src: &str) -> (SymbolTable, Result<(), Vec<SymbolError>>) {
|
fn add_symbols(src: &str) -> (SymbolTable, Result<(), Vec<SymbolError>>) {
|
||||||
let ast = quick_ast(src);
|
let ast = quick_ast(src);
|
||||||
@ -79,11 +79,9 @@ fn no_type_definition_duplicates() {
|
|||||||
let err = &errs[0];
|
let err = &errs[0];
|
||||||
|
|
||||||
match err {
|
match err {
|
||||||
SymbolError::DuplicateName { location: _, prev_name } => {
|
SymbolError::DuplicateName { location, prev_name } => {
|
||||||
assert_eq!(prev_name, &Fqsn::from_strs(&["Food"]));
|
assert_eq!(prev_name, &Fqsn::from_strs(&["Food"]));
|
||||||
|
assert_eq!(location, &Location { line_num: 2, char_num: 2 });
|
||||||
//TODO restore this Location test
|
|
||||||
//assert_eq!(location, &Location { line_num: 2, char_num: 2 });
|
|
||||||
}
|
}
|
||||||
_ => panic!(),
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
@ -137,7 +135,7 @@ fn dont_falsely_detect_duplicates() {
|
|||||||
let a = 40;
|
let a = 40;
|
||||||
77
|
77
|
||||||
}
|
}
|
||||||
let q = 39
|
let q = 39;
|
||||||
"#;
|
"#;
|
||||||
let (symbols, _) = add_symbols(source);
|
let (symbols, _) = add_symbols(source);
|
||||||
|
|
||||||
@ -173,8 +171,7 @@ fn second_inner_func() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inner_func(x)
|
inner_func(x)
|
||||||
}
|
}"#;
|
||||||
"#;
|
|
||||||
let (symbols, _) = add_symbols(source);
|
let (symbols, _) = add_symbols(source);
|
||||||
assert!(symbols.fq_names.table.get(&make_fqsn(&["outer_func"])).is_some());
|
assert!(symbols.fq_names.table.get(&make_fqsn(&["outer_func"])).is_some());
|
||||||
assert!(symbols.fq_names.table.get(&make_fqsn(&["outer_func", "inner_func"])).is_some());
|
assert!(symbols.fq_names.table.get(&make_fqsn(&["outer_func", "inner_func"])).is_some());
|
||||||
@ -190,8 +187,7 @@ inner_func(x)
|
|||||||
fn enclosing_scopes_3() {
|
fn enclosing_scopes_3() {
|
||||||
let source = r#"
|
let source = r#"
|
||||||
fn outer_func(x) {
|
fn outer_func(x) {
|
||||||
|
fn inner_func(arg) {
|
||||||
fn inner_func(arg) {
|
|
||||||
arg
|
arg
|
||||||
}
|
}
|
||||||
|
|
||||||
|
460
schala-lang/language/src/tokenizing.rs
Normal file
460
schala-lang/language/src/tokenizing.rs
Normal file
@ -0,0 +1,460 @@
|
|||||||
|
#![allow(clippy::upper_case_acronyms)]
|
||||||
|
|
||||||
|
use std::{
|
||||||
|
convert::{TryFrom, TryInto},
|
||||||
|
fmt,
|
||||||
|
iter::{Iterator, Peekable},
|
||||||
|
rc::Rc,
|
||||||
|
};
|
||||||
|
|
||||||
|
use itertools::Itertools;
|
||||||
|
|
||||||
|
/// A location in a particular source file. Note that the
|
||||||
|
/// sizes of the internal unsigned integer types limit
|
||||||
|
/// the size of a source file to 2^32 lines of
|
||||||
|
/// at most 2^16 characters, which should be plenty big.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Default)]
|
||||||
|
pub struct Location {
|
||||||
|
pub(crate) line_num: u32,
|
||||||
|
pub(crate) char_num: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Location {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "{}:{}", self.line_num, self.char_num)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Clone)]
|
||||||
|
pub enum TokenKind {
|
||||||
|
Newline,
|
||||||
|
Semicolon,
|
||||||
|
|
||||||
|
LParen,
|
||||||
|
RParen,
|
||||||
|
LSquareBracket,
|
||||||
|
RSquareBracket,
|
||||||
|
LAngleBracket,
|
||||||
|
RAngleBracket,
|
||||||
|
LCurlyBrace,
|
||||||
|
RCurlyBrace,
|
||||||
|
Pipe,
|
||||||
|
Backslash,
|
||||||
|
AtSign,
|
||||||
|
|
||||||
|
Comma,
|
||||||
|
Period,
|
||||||
|
Colon,
|
||||||
|
Underscore,
|
||||||
|
Slash,
|
||||||
|
Equals,
|
||||||
|
|
||||||
|
Operator(Rc<String>),
|
||||||
|
DigitGroup(Rc<String>),
|
||||||
|
HexLiteral(Rc<String>),
|
||||||
|
BinNumberSigil,
|
||||||
|
StrLiteral { s: Rc<String>, prefix: Option<Rc<String>> },
|
||||||
|
Identifier(Rc<String>),
|
||||||
|
Keyword(Kw),
|
||||||
|
|
||||||
|
EOF,
|
||||||
|
|
||||||
|
Error(String),
|
||||||
|
}
|
||||||
|
use self::TokenKind::*;
|
||||||
|
|
||||||
|
impl fmt::Display for TokenKind {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
&Operator(ref s) => write!(f, "Operator({})", **s),
|
||||||
|
&DigitGroup(ref s) => write!(f, "DigitGroup({})", s),
|
||||||
|
&HexLiteral(ref s) => write!(f, "HexLiteral({})", s),
|
||||||
|
&StrLiteral { ref s, .. } => write!(f, "StrLiteral({})", s),
|
||||||
|
&Identifier(ref s) => write!(f, "Identifier({})", s),
|
||||||
|
&Error(ref s) => write!(f, "Error({})", s),
|
||||||
|
other => write!(f, "{:?}", other),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||||
|
pub enum Kw {
|
||||||
|
If,
|
||||||
|
Then,
|
||||||
|
Else,
|
||||||
|
Is,
|
||||||
|
Func,
|
||||||
|
For,
|
||||||
|
While,
|
||||||
|
Let,
|
||||||
|
In,
|
||||||
|
Mut,
|
||||||
|
Return,
|
||||||
|
Continue,
|
||||||
|
Break,
|
||||||
|
Alias,
|
||||||
|
Type,
|
||||||
|
SelfType,
|
||||||
|
SelfIdent,
|
||||||
|
Interface,
|
||||||
|
Impl,
|
||||||
|
True,
|
||||||
|
False,
|
||||||
|
Module,
|
||||||
|
Import,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<&str> for Kw {
|
||||||
|
type Error = ();
|
||||||
|
|
||||||
|
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
||||||
|
Ok(match value {
|
||||||
|
"if" => Kw::If,
|
||||||
|
"then" => Kw::Then,
|
||||||
|
"else" => Kw::Else,
|
||||||
|
"is" => Kw::Is,
|
||||||
|
"fn" => Kw::Func,
|
||||||
|
"for" => Kw::For,
|
||||||
|
"while" => Kw::While,
|
||||||
|
"let" => Kw::Let,
|
||||||
|
"in" => Kw::In,
|
||||||
|
"mut" => Kw::Mut,
|
||||||
|
"return" => Kw::Return,
|
||||||
|
"break" => Kw::Break,
|
||||||
|
"continue" => Kw::Continue,
|
||||||
|
"alias" => Kw::Alias,
|
||||||
|
"type" => Kw::Type,
|
||||||
|
"Self" => Kw::SelfType,
|
||||||
|
"self" => Kw::SelfIdent,
|
||||||
|
"interface" => Kw::Interface,
|
||||||
|
"impl" => Kw::Impl,
|
||||||
|
"true" => Kw::True,
|
||||||
|
"false" => Kw::False,
|
||||||
|
"module" => Kw::Module,
|
||||||
|
"import" => Kw::Import,
|
||||||
|
_ => return Err(()),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct Token {
|
||||||
|
pub kind: TokenKind,
|
||||||
|
pub(crate) location: Location,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Token {
|
||||||
|
pub fn to_string_with_metadata(&self) -> String {
|
||||||
|
format!("{}({})", self.kind, self.location)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_kind(&self) -> TokenKind {
|
||||||
|
self.kind.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const OPERATOR_CHARS: [char; 17] =
|
||||||
|
['!', '$', '%', '&', '*', '+', '-', '.', ':', '<', '>', '=', '?', '^', '|', '~', '`'];
|
||||||
|
fn is_operator(c: &char) -> bool {
|
||||||
|
OPERATOR_CHARS.iter().any(|x| x == c)
|
||||||
|
}
|
||||||
|
|
||||||
|
type CharData = (usize, usize, char);
|
||||||
|
|
||||||
|
pub fn tokenize(input: &str) -> Vec<Token> {
|
||||||
|
let mut tokens: Vec<Token> = Vec::new();
|
||||||
|
|
||||||
|
let mut input = Iterator::intersperse(input.lines().enumerate(), (0, "\n"))
|
||||||
|
.flat_map(|(line_idx, line)| line.chars().enumerate().map(move |(ch_idx, ch)| (line_idx, ch_idx, ch)))
|
||||||
|
.peekable();
|
||||||
|
|
||||||
|
while let Some((line_num, char_num, c)) = input.next() {
|
||||||
|
let cur_tok_kind = match c {
|
||||||
|
'/' => match input.peek().map(|t| t.2) {
|
||||||
|
Some('/') => {
|
||||||
|
for (_, _, c) in input.by_ref() {
|
||||||
|
if c == '\n' {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Some('*') => {
|
||||||
|
input.next();
|
||||||
|
let mut comment_level = 1;
|
||||||
|
while let Some((_, _, c)) = input.next() {
|
||||||
|
if c == '*' && input.peek().map(|t| t.2) == Some('/') {
|
||||||
|
input.next();
|
||||||
|
comment_level -= 1;
|
||||||
|
} else if c == '/' && input.peek().map(|t| t.2) == Some('*') {
|
||||||
|
input.next();
|
||||||
|
comment_level += 1;
|
||||||
|
}
|
||||||
|
if comment_level == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if comment_level != 0 {
|
||||||
|
Error("Unclosed comment".to_string())
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => Slash,
|
||||||
|
},
|
||||||
|
c if c.is_whitespace() && c != '\n' => continue,
|
||||||
|
'\n' => Newline,
|
||||||
|
';' => Semicolon,
|
||||||
|
':' => Colon,
|
||||||
|
',' => Comma,
|
||||||
|
'(' => LParen,
|
||||||
|
')' => RParen,
|
||||||
|
'{' => LCurlyBrace,
|
||||||
|
'}' => RCurlyBrace,
|
||||||
|
'[' => LSquareBracket,
|
||||||
|
']' => RSquareBracket,
|
||||||
|
'"' => handle_quote(&mut input, None),
|
||||||
|
'\\' => Backslash,
|
||||||
|
'@' => AtSign,
|
||||||
|
c if c.is_digit(10) => handle_digit(c, &mut input),
|
||||||
|
c if c.is_alphabetic() || c == '_' => handle_alphabetic(c, &mut input),
|
||||||
|
c if is_operator(&c) => handle_operator(c, &mut input),
|
||||||
|
unknown => Error(format!("Unexpected character: {}", unknown)),
|
||||||
|
};
|
||||||
|
let location =
|
||||||
|
Location { line_num: line_num.try_into().unwrap(), char_num: char_num.try_into().unwrap() };
|
||||||
|
tokens.push(Token { kind: cur_tok_kind, location });
|
||||||
|
}
|
||||||
|
tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle_digit(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
|
||||||
|
let next_ch = input.peek().map(|&(_, _, c)| c);
|
||||||
|
|
||||||
|
if c == '0' && next_ch == Some('x') {
|
||||||
|
input.next();
|
||||||
|
let rest: String = input
|
||||||
|
.peeking_take_while(|&(_, _, ref c)| c.is_digit(16) || *c == '_')
|
||||||
|
.map(|(_, _, c)| c)
|
||||||
|
.collect();
|
||||||
|
HexLiteral(Rc::new(rest))
|
||||||
|
} else if c == '0' && next_ch == Some('b') {
|
||||||
|
input.next();
|
||||||
|
BinNumberSigil
|
||||||
|
} else {
|
||||||
|
let mut buf = c.to_string();
|
||||||
|
buf.extend(input.peeking_take_while(|&(_, _, ref c)| c.is_digit(10)).map(|(_, _, c)| c));
|
||||||
|
DigitGroup(Rc::new(buf))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle_quote(
|
||||||
|
input: &mut Peekable<impl Iterator<Item = CharData>>,
|
||||||
|
quote_prefix: Option<&str>,
|
||||||
|
) -> TokenKind {
|
||||||
|
let mut buf = String::new();
|
||||||
|
loop {
|
||||||
|
match input.next().map(|(_, _, c)| c) {
|
||||||
|
Some('"') => break,
|
||||||
|
Some('\\') => {
|
||||||
|
let next = input.peek().map(|&(_, _, c)| c);
|
||||||
|
if next == Some('n') {
|
||||||
|
input.next();
|
||||||
|
buf.push('\n')
|
||||||
|
} else if next == Some('"') {
|
||||||
|
input.next();
|
||||||
|
buf.push('"');
|
||||||
|
} else if next == Some('t') {
|
||||||
|
input.next();
|
||||||
|
buf.push('\t');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(c) => buf.push(c),
|
||||||
|
None => return TokenKind::Error("Unclosed string".to_string()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TokenKind::StrLiteral { s: Rc::new(buf), prefix: quote_prefix.map(|s| Rc::new(s.to_string())) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
|
||||||
|
let mut buf = String::new();
|
||||||
|
buf.push(c);
|
||||||
|
let next_is_alphabetic = input.peek().map(|&(_, _, c)| !c.is_alphabetic()).unwrap_or(true);
|
||||||
|
if c == '_' && next_is_alphabetic {
|
||||||
|
return TokenKind::Underscore;
|
||||||
|
}
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match input.peek().map(|&(_, _, c)| c) {
|
||||||
|
Some(c) if c == '"' => {
|
||||||
|
input.next();
|
||||||
|
return handle_quote(input, Some(&buf));
|
||||||
|
}
|
||||||
|
Some(c) if c.is_alphanumeric() || c == '_' => {
|
||||||
|
input.next();
|
||||||
|
buf.push(c);
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
match Kw::try_from(buf.as_str()) {
|
||||||
|
Ok(kw) => TokenKind::Keyword(kw),
|
||||||
|
Err(()) => TokenKind::Identifier(Rc::new(buf)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
|
||||||
|
match c {
|
||||||
|
'<' | '>' | '|' | '.' | '=' => {
|
||||||
|
let next = &input.peek().map(|&(_, _, c)| c);
|
||||||
|
let next_is_op = next.map(|n| is_operator(&n)).unwrap_or(false);
|
||||||
|
if !next_is_op {
|
||||||
|
return match c {
|
||||||
|
'<' => LAngleBracket,
|
||||||
|
'>' => RAngleBracket,
|
||||||
|
'|' => Pipe,
|
||||||
|
'.' => Period,
|
||||||
|
'=' => Equals,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut buf = String::new();
|
||||||
|
|
||||||
|
if c == '`' {
|
||||||
|
loop {
|
||||||
|
match input.peek().map(|&(_, _, c)| c) {
|
||||||
|
Some(c) if c.is_alphabetic() || c == '_' => {
|
||||||
|
input.next();
|
||||||
|
buf.push(c);
|
||||||
|
}
|
||||||
|
Some('`') => {
|
||||||
|
input.next();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
buf.push(c);
|
||||||
|
loop {
|
||||||
|
match input.peek().map(|&(_, _, c)| c) {
|
||||||
|
Some(c) if is_operator(&c) => {
|
||||||
|
input.next();
|
||||||
|
buf.push(c);
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TokenKind::Operator(Rc::new(buf))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod schala_tokenizer_tests {
|
||||||
|
use super::{Kw::*, *};
|
||||||
|
|
||||||
|
macro_rules! digit {
|
||||||
|
($ident:expr) => {
|
||||||
|
DigitGroup(Rc::new($ident.to_string()))
|
||||||
|
};
|
||||||
|
}
|
||||||
|
macro_rules! ident {
|
||||||
|
($ident:expr) => {
|
||||||
|
Identifier(Rc::new($ident.to_string()))
|
||||||
|
};
|
||||||
|
}
|
||||||
|
macro_rules! op {
|
||||||
|
($ident:expr) => {
|
||||||
|
Operator(Rc::new($ident.to_string()))
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn token_kinds(input: &str) -> Vec<TokenKind> {
|
||||||
|
tokenize(input).into_iter().map(move |tok| tok.kind).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokens() {
|
||||||
|
let output = token_kinds("let a: A<B> = c ++ d");
|
||||||
|
assert_eq!(
|
||||||
|
output,
|
||||||
|
vec![
|
||||||
|
Keyword(Let),
|
||||||
|
ident!("a"),
|
||||||
|
Colon,
|
||||||
|
ident!("A"),
|
||||||
|
LAngleBracket,
|
||||||
|
ident!("B"),
|
||||||
|
RAngleBracket,
|
||||||
|
Equals,
|
||||||
|
ident!("c"),
|
||||||
|
op!("++"),
|
||||||
|
ident!("d")
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn underscores() {
|
||||||
|
let output = token_kinds("4_8");
|
||||||
|
assert_eq!(output, vec![digit!("4"), Underscore, digit!("8")]);
|
||||||
|
|
||||||
|
let output = token_kinds("aba_yo");
|
||||||
|
assert_eq!(output, vec![ident!("aba_yo")]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn comments() {
|
||||||
|
let output = token_kinds("1 + /* hella /* bro */ */ 2");
|
||||||
|
assert_eq!(output, vec![digit!("1"), op!("+"), digit!("2")]);
|
||||||
|
|
||||||
|
let output = token_kinds("1 + /* hella /* bro */ 2");
|
||||||
|
assert_eq!(output, vec![digit!("1"), op!("+"), Error("Unclosed comment".to_string())]);
|
||||||
|
|
||||||
|
//TODO not sure if I want this behavior
|
||||||
|
let output = token_kinds("1 + /* hella */ bro */ 2");
|
||||||
|
assert_eq!(
|
||||||
|
output,
|
||||||
|
vec![
|
||||||
|
digit!("1"),
|
||||||
|
op!("+"),
|
||||||
|
Identifier(Rc::new("bro".to_string())),
|
||||||
|
Operator(Rc::new("*".to_string())),
|
||||||
|
Slash,
|
||||||
|
DigitGroup(Rc::new("2".to_string()))
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn backtick_operators() {
|
||||||
|
let output = token_kinds("1 `plus` 2");
|
||||||
|
assert_eq!(output, vec![digit!("1"), op!("plus"), digit!("2")]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn string_literals() {
|
||||||
|
let output = token_kinds(r#""some string""#);
|
||||||
|
assert_eq!(output, vec![StrLiteral { s: Rc::new("some string".to_string()), prefix: None }]);
|
||||||
|
|
||||||
|
let output = token_kinds(r#"b"some bytestring""#);
|
||||||
|
assert_eq!(
|
||||||
|
output,
|
||||||
|
vec![StrLiteral {
|
||||||
|
s: Rc::new("some bytestring".to_string()),
|
||||||
|
prefix: Some(Rc::new("b".to_string()))
|
||||||
|
}]
|
||||||
|
);
|
||||||
|
|
||||||
|
let output = token_kinds(r#""Do \n \" escapes work\t""#);
|
||||||
|
assert_eq!(
|
||||||
|
output,
|
||||||
|
vec![StrLiteral { s: Rc::new("Do \n \" escapes work\t".to_string()), prefix: None }]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
@ -43,7 +43,7 @@ fn test_basic_eval() {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn op_eval() {
|
fn op_eval() {
|
||||||
eval_assert("-13", "-13");
|
eval_assert("- 13", "-13");
|
||||||
eval_assert("10 - 2", "8");
|
eval_assert("10 - 2", "8");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,7 +96,7 @@ trad()"#,
|
|||||||
);
|
);
|
||||||
|
|
||||||
let err =
|
let err =
|
||||||
"No symbol found for name: QualifiedName { id: Id { idx: 9, t: PhantomData }, components: [\"a\"] }";
|
"No symbol found for name: QualifiedName { id: Id { idx: 4, t: PhantomData }, components: [\"a\"] }";
|
||||||
|
|
||||||
eval_assert_failure(
|
eval_assert_failure(
|
||||||
r#"
|
r#"
|
||||||
@ -271,26 +271,26 @@ fn full_if_matching() {
|
|||||||
let source = r#"
|
let source = r#"
|
||||||
type Option<T> = Some(T) | None
|
type Option<T> = Some(T) | None
|
||||||
let a = Option::None
|
let a = Option::None
|
||||||
if a { is Option::None then 4; is Option::Some(x) then x }
|
if a { is Option::None then 4, is Option::Some(x) then x }
|
||||||
"#;
|
"#;
|
||||||
eval_assert(source, "4");
|
eval_assert(source, "4");
|
||||||
|
|
||||||
let source = r#"
|
let source = r#"
|
||||||
type Option<T> = Some(T) | None
|
type Option<T> = Some(T) | None
|
||||||
let sara = Option::Some(99)
|
let sara = Option::Some(99)
|
||||||
if sara { is Option::None then 1 + 3; is Option::Some(x) then x }
|
if sara { is Option::None then 1 + 3, is Option::Some(x) then x }
|
||||||
"#;
|
"#;
|
||||||
eval_assert(source, "99");
|
eval_assert(source, "99");
|
||||||
|
|
||||||
let source = r#"
|
let source = r#"
|
||||||
let a = 10
|
let a = 10
|
||||||
if a { is 10 then "x"; is 4 then "y" }
|
if a { is 10 then "x", is 4 then "y" }
|
||||||
"#;
|
"#;
|
||||||
eval_assert(source, "\"x\"");
|
eval_assert(source, "\"x\"");
|
||||||
|
|
||||||
let source = r#"
|
let source = r#"
|
||||||
let a = 10
|
let a = 10
|
||||||
if a { is 15 then "x"; is 10 then "y" }
|
if a { is 15 then "x", is 10 then "y" }
|
||||||
"#;
|
"#;
|
||||||
eval_assert(source, "\"y\"");
|
eval_assert(source, "\"y\"");
|
||||||
}
|
}
|
||||||
@ -300,7 +300,7 @@ if a { is 15 then "x"; is 10 then "y" }
|
|||||||
fn string_pattern() {
|
fn string_pattern() {
|
||||||
let source = r#"
|
let source = r#"
|
||||||
let a = "foo"
|
let a = "foo"
|
||||||
if a { is "foo" then "x"; is _ then "y" }
|
if a { is "foo" then "x", is _ then "y" }
|
||||||
"#;
|
"#;
|
||||||
eval_assert(source, "\"x\"");
|
eval_assert(source, "\"x\"");
|
||||||
}
|
}
|
||||||
@ -310,7 +310,7 @@ fn boolean_pattern() {
|
|||||||
let source = r#"
|
let source = r#"
|
||||||
let a = true
|
let a = true
|
||||||
if a {
|
if a {
|
||||||
is true then "x"
|
is true then "x",
|
||||||
is false then "y"
|
is false then "y"
|
||||||
}
|
}
|
||||||
"#;
|
"#;
|
||||||
@ -321,7 +321,7 @@ if a {
|
|||||||
fn boolean_pattern_2() {
|
fn boolean_pattern_2() {
|
||||||
let source = r#"
|
let source = r#"
|
||||||
let a = false
|
let a = false
|
||||||
if a { is true then "x"; is false then "y" }
|
if a { is true then "x", is false then "y" }
|
||||||
"#;
|
"#;
|
||||||
eval_assert(source, "\"y\"");
|
eval_assert(source, "\"y\"");
|
||||||
}
|
}
|
||||||
@ -341,7 +341,7 @@ if Option::Some(10) {
|
|||||||
fn tuple_pattern() {
|
fn tuple_pattern() {
|
||||||
let source = r#"
|
let source = r#"
|
||||||
if (1, 2) {
|
if (1, 2) {
|
||||||
is (1, x) then x;
|
is (1, x) then x,
|
||||||
is _ then 99
|
is _ then 99
|
||||||
}
|
}
|
||||||
"#;
|
"#;
|
||||||
@ -352,7 +352,7 @@ if (1, 2) {
|
|||||||
fn tuple_pattern_2() {
|
fn tuple_pattern_2() {
|
||||||
let source = r#"
|
let source = r#"
|
||||||
if (1, 2) {
|
if (1, 2) {
|
||||||
is (10, x) then x
|
is (10, x) then x,
|
||||||
is (y, x) then x + y
|
is (y, x) then x + y
|
||||||
}
|
}
|
||||||
"#;
|
"#;
|
||||||
@ -363,7 +363,7 @@ if (1, 2) {
|
|||||||
fn tuple_pattern_3() {
|
fn tuple_pattern_3() {
|
||||||
let source = r#"
|
let source = r#"
|
||||||
if (1, 5) {
|
if (1, 5) {
|
||||||
is (10, x) then x
|
is (10, x) then x,
|
||||||
is (1, x) then x
|
is (1, x) then x
|
||||||
}
|
}
|
||||||
"#;
|
"#;
|
||||||
@ -374,8 +374,8 @@ if (1, 5) {
|
|||||||
fn tuple_pattern_4() {
|
fn tuple_pattern_4() {
|
||||||
let source = r#"
|
let source = r#"
|
||||||
if (1, 5) {
|
if (1, 5) {
|
||||||
is (10, x) then x
|
is (10, x) then x,
|
||||||
is (1, x) then x
|
is (1, x) then x,
|
||||||
}
|
}
|
||||||
"#;
|
"#;
|
||||||
eval_assert(source, "5");
|
eval_assert(source, "5");
|
||||||
@ -390,21 +390,21 @@ let b = Stuff::Jugs(1, "haha")
|
|||||||
let c = Stuff::Mardok
|
let c = Stuff::Mardok
|
||||||
|
|
||||||
let x = if a {
|
let x = if a {
|
||||||
is Stuff::Mulch(20) then "x"
|
is Stuff::Mulch(20) then "x",
|
||||||
is _ then "ERR"
|
is _ then "ERR"
|
||||||
}
|
}
|
||||||
|
|
||||||
let y = if b {
|
let y = if b {
|
||||||
is Stuff::Mulch(n) then "ERR"
|
is Stuff::Mulch(n) then "ERR",
|
||||||
is Stuff::Jugs(2, _) then "ERR"
|
is Stuff::Jugs(2, _) then "ERR",
|
||||||
is Stuff::Jugs(1, s) then s
|
is Stuff::Jugs(1, s) then s,
|
||||||
is _ then "ERR"
|
is _ then "ERR",
|
||||||
}
|
}
|
||||||
|
|
||||||
let z = if c {
|
let z = if c {
|
||||||
is Stuff::Jugs(_, _) then "ERR"
|
is Stuff::Jugs(_, _) then "ERR",
|
||||||
is Stuff::Mardok then "NIGH"
|
is Stuff::Mardok then "NIGH",
|
||||||
is _ then "ERR"
|
is _ then "ERR",
|
||||||
}
|
}
|
||||||
|
|
||||||
(x, y, z)
|
(x, y, z)
|
||||||
|
@ -52,8 +52,10 @@ where T: Hash + Eq
|
|||||||
/// Quickly create an AST from a string, with no error checking. For test use only
|
/// Quickly create an AST from a string, with no error checking. For test use only
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub fn quick_ast(input: &str) -> crate::ast::AST {
|
pub fn quick_ast(input: &str) -> crate::ast::AST {
|
||||||
let mut parser = crate::parsing::new::Parser::new();
|
let tokens = crate::tokenizing::tokenize(input);
|
||||||
let output = parser.parse(input);
|
let mut parser = crate::parsing::Parser::new();
|
||||||
|
parser.add_new_tokens(tokens);
|
||||||
|
let output = parser.parse();
|
||||||
output.unwrap()
|
output.unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,7 +77,7 @@ x is Some(t) // type bool
|
|||||||
|
|
||||||
if x {
|
if x {
|
||||||
is Some(t) => {
|
is Some(t) => {
|
||||||
}
|
},
|
||||||
is None => {
|
is None => {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user