Compare commits
No commits in common. "94ee3e18970b46906682cd1e241d16ab7f6fafcb" and "8a9c63eccf45d1ff3c4950e45900fecfb7f05f90" have entirely different histories.
94ee3e1897
...
8a9c63eccf
25
Cargo.lock
generated
25
Cargo.lock
generated
@ -558,30 +558,6 @@ dependencies = [
|
||||
"winapi 0.3.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "peg"
|
||||
version = "0.7.0"
|
||||
source = "git+https://github.com/kevinmehall/rust-peg?rev=960222580c8da25b17d32c2aae6f52f902728b62#960222580c8da25b17d32c2aae6f52f902728b62"
|
||||
dependencies = [
|
||||
"peg-macros",
|
||||
"peg-runtime",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "peg-macros"
|
||||
version = "0.7.0"
|
||||
source = "git+https://github.com/kevinmehall/rust-peg?rev=960222580c8da25b17d32c2aae6f52f902728b62#960222580c8da25b17d32c2aae6f52f902728b62"
|
||||
dependencies = [
|
||||
"peg-runtime",
|
||||
"proc-macro2 1.0.30",
|
||||
"quote 1.0.10",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "peg-runtime"
|
||||
version = "0.7.0"
|
||||
source = "git+https://github.com/kevinmehall/rust-peg?rev=960222580c8da25b17d32c2aae6f52f902728b62#960222580c8da25b17d32c2aae6f52f902728b62"
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.7.24"
|
||||
@ -919,7 +895,6 @@ dependencies = [
|
||||
"ena",
|
||||
"failure",
|
||||
"itertools",
|
||||
"peg",
|
||||
"pretty_assertions",
|
||||
"radix_trie",
|
||||
"schala-lang-codegen",
|
||||
|
@ -34,6 +34,7 @@ impl Fold for RecursiveDescentFn {
|
||||
}
|
||||
|
||||
result.map_err(|mut parse_error: ParseError| {
|
||||
parse_error.production_name = Some(stringify!(#ident).to_string());
|
||||
parse_error
|
||||
})
|
||||
}
|
||||
|
@ -14,9 +14,6 @@ derivative = "1.0.3"
|
||||
colored = "1.8"
|
||||
radix_trie = "0.1.5"
|
||||
assert_matches = "1.5"
|
||||
#peg = "0.7.0"
|
||||
peg = { git = "https://github.com/kevinmehall/rust-peg", rev = "960222580c8da25b17d32c2aae6f52f902728b62" }
|
||||
|
||||
|
||||
schala-lang-codegen = { path = "../codegen" }
|
||||
schala-repl = { path = "../../schala-repl" }
|
||||
|
@ -14,8 +14,8 @@ fn getline(arg) { }
|
||||
|
||||
fn map(input: Option<T>, func: Func): Option<T> {
|
||||
if input {
|
||||
is Option::Some(x) then Option::Some(func(x))
|
||||
is Option::None then Option::None
|
||||
is Option::Some(x) then Option::Some(func(x)),
|
||||
is Option::None then Option::None,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,7 +17,7 @@ pub use visitor::*;
|
||||
use crate::{
|
||||
derivative::Derivative,
|
||||
identifier::{define_id_kind, Id},
|
||||
parsing::Location,
|
||||
tokenizing::Location,
|
||||
};
|
||||
|
||||
define_id_kind!(ASTItem);
|
||||
@ -197,7 +197,6 @@ pub struct TypeSingletonName {
|
||||
pub enum ExpressionKind {
|
||||
NatLiteral(u64),
|
||||
FloatLiteral(f64),
|
||||
//TODO StringLiteral variant needs to support prefixes
|
||||
StringLiteral(Rc<String>),
|
||||
BoolLiteral(bool),
|
||||
BinExp(BinOp, Box<Expression>, Box<Expression>),
|
||||
@ -240,7 +239,7 @@ pub struct ConditionArm {
|
||||
pub enum Condition {
|
||||
Pattern(Pattern),
|
||||
TruncatedOp(BinOp, Expression),
|
||||
//Expression(Expression), //I'm pretty sure I don't actually want this
|
||||
Expression(Expression),
|
||||
Else,
|
||||
}
|
||||
|
||||
@ -263,7 +262,7 @@ pub enum PatternLiteral {
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Enumerator {
|
||||
pub id: Rc<String>, //TODO rename this field
|
||||
pub id: Rc<String>,
|
||||
pub generator: Expression,
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
use std::rc::Rc;
|
||||
|
||||
use crate::tokenizing::TokenKind;
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct PrefixOp {
|
||||
sigil: Rc<String>,
|
||||
@ -13,6 +15,10 @@ impl PrefixOp {
|
||||
pub fn sigil(&self) -> &str {
|
||||
&self.sigil
|
||||
}
|
||||
|
||||
pub fn is_prefix(op: &str) -> bool {
|
||||
matches!(op, "+" | "-" | "!")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
@ -29,14 +35,34 @@ impl BinOp {
|
||||
&self.sigil
|
||||
}
|
||||
|
||||
pub fn from_sigil_token(tok: &TokenKind) -> Option<BinOp> {
|
||||
let s = token_kind_to_sigil(tok)?;
|
||||
Some(BinOp::from_sigil(s))
|
||||
}
|
||||
|
||||
pub fn min_precedence() -> i32 {
|
||||
i32::min_value()
|
||||
}
|
||||
pub fn get_precedence(&self) -> i32 {
|
||||
binop_precedences(self.sigil.as_ref())
|
||||
pub fn get_precedence_from_token(op_tok: &TokenKind) -> Option<i32> {
|
||||
let s = token_kind_to_sigil(op_tok)?;
|
||||
Some(binop_precedences(s))
|
||||
}
|
||||
}
|
||||
|
||||
fn token_kind_to_sigil(tok: &TokenKind) -> Option<&str> {
|
||||
use self::TokenKind::*;
|
||||
Some(match tok {
|
||||
Operator(op) => op.as_str(),
|
||||
Period => ".",
|
||||
Pipe => "|",
|
||||
Slash => "/",
|
||||
LAngleBracket => "<",
|
||||
RAngleBracket => ">",
|
||||
Equals => "=",
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
||||
fn binop_precedences(s: &str) -> i32 {
|
||||
let default = 10_000_000;
|
||||
match s {
|
||||
|
@ -166,6 +166,9 @@ pub fn walk_if_expr_body<V: ASTVisitor>(v: &mut V, body: &IfExpressionBody) {
|
||||
Condition::TruncatedOp(ref _binop, ref expr) => {
|
||||
walk_expression(v, expr);
|
||||
}
|
||||
Condition::Expression(ref expr) => {
|
||||
walk_expression(v, expr);
|
||||
}
|
||||
Condition::Else => (),
|
||||
}
|
||||
if let Some(ref guard) = arm.guard {
|
||||
|
@ -1,7 +1,8 @@
|
||||
use crate::{
|
||||
parsing::{Location, ParseError},
|
||||
parsing::ParseError,
|
||||
schala::{SourceReference, Stage},
|
||||
symbol_table::SymbolError,
|
||||
tokenizing::{Location, Token, TokenKind},
|
||||
type_inference::TypeError,
|
||||
};
|
||||
|
||||
@ -51,6 +52,26 @@ impl SchalaError {
|
||||
errors: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn from_tokens(tokens: &[Token]) -> Option<SchalaError> {
|
||||
let token_errors: Vec<Error> = tokens
|
||||
.iter()
|
||||
.filter_map(|tok| match tok.kind {
|
||||
TokenKind::Error(ref err) => Some(Error {
|
||||
location: Some(tok.location),
|
||||
text: Some(err.clone()),
|
||||
stage: Stage::Tokenizing,
|
||||
}),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
|
||||
if token_errors.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(SchalaError { errors: token_errors, formatted_parse_error: None })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
@ -61,18 +82,22 @@ struct Error {
|
||||
}
|
||||
|
||||
fn format_parse_error(error: ParseError, source_reference: &SourceReference) -> String {
|
||||
let offset = error.location.offset;
|
||||
let (line_start, line_num, line_from_program) = source_reference.get_line(offset);
|
||||
let ch = offset - line_start;
|
||||
|
||||
let location_pointer = format!("{}^", " ".repeat(ch));
|
||||
let line_num = error.token.location.line_num;
|
||||
let ch = error.token.location.char_num;
|
||||
let line_from_program = source_reference.get_line(line_num as usize);
|
||||
let location_pointer = format!("{}^", " ".repeat(ch.into()));
|
||||
|
||||
let line_num_digits = format!("{}", line_num).chars().count();
|
||||
let space_padding = " ".repeat(line_num_digits);
|
||||
|
||||
let production = match error.production_name {
|
||||
Some(n) => format!("\n(from production \"{}\")", n),
|
||||
None => "".to_string(),
|
||||
};
|
||||
|
||||
format!(
|
||||
r#"
|
||||
{error_msg}
|
||||
{error_msg}{production}
|
||||
{space_padding} |
|
||||
{line_num} | {}
|
||||
{space_padding} | {}
|
||||
@ -82,5 +107,6 @@ fn format_parse_error(error: ParseError, source_reference: &SourceReference) ->
|
||||
error_msg = error.msg,
|
||||
space_padding = space_padding,
|
||||
line_num = line_num,
|
||||
production = production
|
||||
)
|
||||
}
|
||||
|
@ -7,6 +7,7 @@
|
||||
//! `ProgrammingLanguageInterface` and the chain of compiler passes for it.
|
||||
|
||||
extern crate schala_repl;
|
||||
#[macro_use]
|
||||
extern crate schala_lang_codegen;
|
||||
extern crate derivative;
|
||||
|
||||
@ -18,6 +19,7 @@ mod type_inference;
|
||||
|
||||
mod ast;
|
||||
mod parsing;
|
||||
mod tokenizing;
|
||||
#[macro_use]
|
||||
mod symbol_table;
|
||||
mod builtin;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,592 +0,0 @@
|
||||
use std::rc::Rc;
|
||||
|
||||
//TODO make use of the format_parse_error function
|
||||
//use crate::error::{SchalaError, format_parse_error};
|
||||
use crate::{
|
||||
ast::*,
|
||||
identifier::{Id, IdStore},
|
||||
parsing::ParseError,
|
||||
};
|
||||
|
||||
fn rc_string(s: &str) -> Rc<String> {
|
||||
Rc::new(s.to_string())
|
||||
}
|
||||
|
||||
pub struct Parser {
|
||||
id_store: IdStore<ASTItem>,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self { id_store: IdStore::new() }
|
||||
}
|
||||
|
||||
pub(crate) fn parse(&mut self, input: &str) -> Result<AST, ParseError> {
|
||||
use peg::str::LineCol;
|
||||
|
||||
schala_parser::program(input, self).map_err(|err: peg::error::ParseError<LineCol>| {
|
||||
let msg = err.to_string();
|
||||
ParseError {
|
||||
msg,
|
||||
location: err.location.offset.into(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn fresh(&mut self) -> Id<ASTItem> {
|
||||
self.id_store.fresh()
|
||||
}
|
||||
}
|
||||
|
||||
enum ExtendedPart<'a> {
|
||||
Index(Vec<Expression>),
|
||||
Accessor(&'a str),
|
||||
Call(Vec<InvocationArgument>),
|
||||
}
|
||||
|
||||
peg::parser! {
|
||||
pub grammar schala_parser() for str {
|
||||
|
||||
rule whitespace() = [' ' | '\t' ]
|
||||
rule whitespace_or_newline() = [' ' | '\t' | '\n' ]
|
||||
|
||||
rule _ = quiet!{ (block_comment() / line_comment() / whitespace())* }
|
||||
|
||||
rule __ = quiet!{ (block_comment() / line_comment() / whitespace_or_newline())* }
|
||||
|
||||
rule block_comment() = "/*" (block_comment() / !"*/" [_])* "*/"
|
||||
rule line_comment() = "//" (!['\n'] [_])* &"\n"
|
||||
|
||||
|
||||
pub rule program(parser: &mut Parser) -> AST =
|
||||
__ statements:(statement(parser) ** (delimiter()+) ) __ { AST { id: parser.fresh(), statements: statements.into() } }
|
||||
|
||||
rule delimiter() = (";" / "\n")+
|
||||
|
||||
//Note - this is a hack, ideally the rule `rule block() -> Block = "{" _ items:(statement() **
|
||||
//delimiter()) _ "}" { items.into() }` would've worked, but it doesn't.
|
||||
pub rule block(parser: &mut Parser) -> Block =
|
||||
"{" __ items:block_item(parser)* __ "}" { items.into() } /
|
||||
"{" __ stmt:statement(parser) __ "}" { vec![stmt].into() }
|
||||
|
||||
rule block_item(parser: &mut Parser) -> Statement =
|
||||
_ stmt:statement(parser) _ delimiter()+ { stmt }
|
||||
|
||||
rule statement(parser: &mut Parser) -> Statement =
|
||||
_ pos:position!() kind:statement_kind(parser) _ { Statement { id: parser.fresh(), location: pos.into(), kind } }
|
||||
|
||||
rule statement_kind(parser: &mut Parser) -> StatementKind =
|
||||
__ import:import(parser) { StatementKind::Import(import) } /
|
||||
__ decl:declaration(parser) { StatementKind::Declaration(decl) } /
|
||||
__ flow:flow(parser) { StatementKind::Flow(flow) } /
|
||||
__ expr:expression(parser) { StatementKind::Expression(expr) }
|
||||
|
||||
rule flow(parser: &mut Parser) -> FlowControl =
|
||||
"continue" { FlowControl::Continue } /
|
||||
"break" { FlowControl::Break } /
|
||||
"return" _ expr:expression(parser)? { FlowControl::Return(expr) }
|
||||
|
||||
rule import(parser: &mut Parser) -> ImportSpecifier =
|
||||
"import" _ path_components:path_components() suffix:import_suffix()? {
|
||||
ImportSpecifier {
|
||||
id: parser.fresh(),
|
||||
path_components,
|
||||
imported_names: suffix.unwrap_or_else(|| ImportedNames::LastOfPath)
|
||||
}
|
||||
}
|
||||
|
||||
rule path_components() -> Vec<Rc<String>> =
|
||||
"::"? name:identifier() rest:path_component()* {
|
||||
let mut items = vec![rc_string(name)];
|
||||
items.extend(rest.into_iter().map(|n| rc_string(n)));
|
||||
items
|
||||
}
|
||||
|
||||
rule path_component() -> &'input str = "::" ident:identifier() { ident }
|
||||
|
||||
rule import_suffix() -> ImportedNames =
|
||||
"::*" { ImportedNames::All } /
|
||||
"::{" __ names:(identifier() ** (_ "," _)) __ "}" { ImportedNames::List(names.into_iter().map(rc_string).collect()) }
|
||||
|
||||
|
||||
rule declaration(parser: &mut Parser) -> Declaration =
|
||||
binding(parser) / type_decl(parser) / annotation(parser) / func(parser) / interface(parser) /
|
||||
implementation(parser) / module(parser)
|
||||
|
||||
rule module(parser: &mut Parser) -> Declaration =
|
||||
"module" _ name:identifier() _ items:block(parser) { Declaration::Module { name: rc_string(name), items } }
|
||||
|
||||
rule implementation(parser: &mut Parser) -> Declaration =
|
||||
"impl" _ interface:type_singleton_name() _ "for" _ type_name:type_identifier() _ block:decl_block(parser) {
|
||||
Declaration::Impl { type_name, interface_name: Some(interface), block }
|
||||
|
||||
} /
|
||||
"impl" _ type_name:type_identifier() _ block:decl_block(parser) {
|
||||
Declaration::Impl { type_name, interface_name: None, block }
|
||||
}
|
||||
|
||||
rule decl_block(parser: &mut Parser) -> Vec<Declaration> =
|
||||
"{" __ decls:(func_declaration(parser) ** (delimiter()+)) __ "}" { decls }
|
||||
|
||||
rule interface(parser: &mut Parser) -> Declaration =
|
||||
"interface" _ name:identifier() _ signatures:signature_block(parser) { Declaration::Interface { name: rc_string(name), signatures } }
|
||||
|
||||
rule signature_block(parser: &mut Parser) -> Vec<Signature> =
|
||||
"{" __ signatures:(func_signature(parser) ** (delimiter()+)) __ "}" { signatures }
|
||||
|
||||
rule func(parser: &mut Parser) -> Declaration =
|
||||
decl:func_declaration(parser) { decl } /
|
||||
sig:func_signature(parser) { Declaration::FuncSig(sig) }
|
||||
|
||||
rule func_declaration(parser: &mut Parser) -> Declaration =
|
||||
_ sig:func_signature(parser) __ body:block(parser) { Declaration::FuncDecl(sig, body) }
|
||||
|
||||
//TODO handle operators
|
||||
rule func_signature(parser: &mut Parser) -> Signature =
|
||||
_ "fn" _ name:identifier() "(" _ params:formal_params(parser) _ ")" _ type_anno:type_anno()? { Signature {
|
||||
name: rc_string(name), operator: false, params, type_anno
|
||||
} }
|
||||
|
||||
rule formal_params(parser: &mut Parser) -> Vec<FormalParam> =
|
||||
params:(formal_param(parser) ** (_ "," _)) {? if params.len() < 256 { Ok(params) } else {
|
||||
Err("function-too-long") }
|
||||
}
|
||||
|
||||
rule formal_param(parser: &mut Parser) -> FormalParam =
|
||||
name:identifier() _ anno:type_anno()? _ "=" expr:expression(parser) { FormalParam { name: rc_string(name),
|
||||
default: Some(expr), anno } } /
|
||||
name:identifier() _ anno:type_anno()? { FormalParam { name: rc_string(name), default: None, anno } }
|
||||
|
||||
|
||||
rule annotation(parser: &mut Parser) -> Declaration =
|
||||
"@" name:identifier() args:annotation_args(parser)? delimiter()+ _ inner:statement(parser) { Declaration::Annotation {
|
||||
name: rc_string(name), arguments: if let Some(args) = args { args } else { vec![] }, inner: Box::new(inner) }
|
||||
}
|
||||
|
||||
rule annotation_args(parser: &mut Parser) -> Vec<Expression> =
|
||||
"(" _ args:(expression(parser) ** (_ "," _)) _ ")" { args }
|
||||
|
||||
|
||||
rule binding(parser: &mut Parser) -> Declaration =
|
||||
"let" _ mutable:"mut"? _ ident:identifier() _ type_anno:type_anno()? _ "=" _ expr:expression(parser) {
|
||||
Declaration::Binding { name: Rc::new(ident.to_string()), constant: mutable.is_none(),
|
||||
type_anno, expr }
|
||||
}
|
||||
|
||||
|
||||
rule type_decl(parser: &mut Parser) -> Declaration =
|
||||
"type" _ "alias" _ alias:type_alias() { alias } /
|
||||
"type" _ mutable:"mut"? _ name:type_singleton_name() _ "=" _ body:type_body(parser) {
|
||||
Declaration::TypeDecl { name, body, mutable: mutable.is_some() }
|
||||
}
|
||||
|
||||
rule type_singleton_name() -> TypeSingletonName =
|
||||
name:identifier() params:type_params()? { TypeSingletonName {
|
||||
name: rc_string(name), params: if let Some(params) = params { params } else { vec![] }
|
||||
} }
|
||||
|
||||
rule type_params() -> Vec<TypeIdentifier> =
|
||||
"<" _ idents:(type_identifier() ** (_ "," _)) _ ">" { idents }
|
||||
|
||||
rule type_identifier() -> TypeIdentifier =
|
||||
"(" _ items:(type_identifier() ** (_ "," _)) _ ")" { TypeIdentifier::Tuple(items) } /
|
||||
singleton:type_singleton_name() { TypeIdentifier::Singleton(singleton) }
|
||||
|
||||
rule type_body(parser: &mut Parser) -> TypeBody =
|
||||
"{" _ items:(record_variant_item() ** (__ "," __)) __ "}" { TypeBody::ImmediateRecord(parser.fresh(), items) } /
|
||||
variants:(variant_spec(parser) ** (__ "|" __)) { TypeBody::Variants(variants) }
|
||||
|
||||
rule variant_spec(parser: &mut Parser) -> Variant =
|
||||
name:identifier() __ "{" __ typed_identifier_list:(record_variant_item() ** (__ "," __)) __ ","? __ "}" { Variant {
|
||||
id: parser.fresh(), name: rc_string(name), kind: VariantKind::Record(typed_identifier_list)
|
||||
} } /
|
||||
name:identifier() "(" tuple_members:(type_identifier() ++ (__ "," __)) ")" { Variant {
|
||||
id: parser.fresh(), name: rc_string(name), kind: VariantKind::TupleStruct(tuple_members) } } /
|
||||
name:identifier() { Variant { id: parser.fresh(), name: rc_string(name), kind: VariantKind::UnitStruct } }
|
||||
|
||||
rule record_variant_item() -> (Rc<String>, TypeIdentifier) =
|
||||
name:identifier() _ ":" _ ty:type_identifier() { (rc_string(name), ty) }
|
||||
|
||||
rule type_alias() -> Declaration =
|
||||
alias:identifier() _ "=" _ name:identifier() { Declaration::TypeAlias { alias: rc_string(alias), original: rc_string(name), } }
|
||||
|
||||
rule type_anno() -> TypeIdentifier =
|
||||
":" _ identifier:type_identifier() { identifier }
|
||||
|
||||
pub rule expression(parser: &mut Parser) -> Expression =
|
||||
__ kind:expression_kind(true, parser) _ type_anno:type_anno()? { Expression { id: parser.fresh(), type_anno, kind } }
|
||||
|
||||
rule expression_no_struct(parser: &mut Parser) -> Expression =
|
||||
__ kind:expression_kind(false, parser) { Expression { id: parser.fresh(), type_anno: None, kind: kind } }
|
||||
|
||||
rule expression_kind(struct_ok: bool, parser: &mut Parser) -> ExpressionKind =
|
||||
precedence_expr(struct_ok, parser)
|
||||
|
||||
rule precedence_expr(struct_ok: bool, parser: &mut Parser) -> ExpressionKind =
|
||||
first:prefix_expr(struct_ok, parser) _ next:(precedence_continuation(struct_ok, parser))* {
|
||||
let next = next.into_iter().map(|(sigil, expr)| (BinOp::from_sigil(sigil), expr)).collect();
|
||||
BinopSequence { first, next }.do_precedence(parser)
|
||||
}
|
||||
|
||||
rule precedence_continuation(struct_ok: bool, parser: &mut Parser) -> (&'input str, ExpressionKind) =
|
||||
op:operator() _ expr:prefix_expr(struct_ok, parser) _ { (op, expr) }
|
||||
|
||||
rule prefix_expr(struct_ok: bool, parser: &mut Parser) -> ExpressionKind =
|
||||
prefix:prefix()? expr:extended_expr(struct_ok, parser) {
|
||||
if let Some(p) = prefix {
|
||||
let expr = Expression::new(parser.fresh(), expr);
|
||||
let prefix = PrefixOp::from_sigil(p);
|
||||
ExpressionKind::PrefixExp(prefix, Box::new(expr))
|
||||
} else {
|
||||
expr
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
rule prefix() -> &'input str =
|
||||
$(['+' | '-' | '!' ])
|
||||
|
||||
//TODO make the definition of operators more complex
|
||||
rule operator() -> &'input str =
|
||||
quiet!{!"*/" s:$( ['+' | '-' | '*' | '/' | '%' | '<' | '>' | '=' | '!' | '$' | '&' | '|' | '?' | '^' | '`']+ ) { s } } /
|
||||
expected!("operator")
|
||||
|
||||
rule extended_expr(struct_ok: bool, parser: &mut Parser) -> ExpressionKind =
|
||||
primary:primary(struct_ok, parser) parts:(extended_expr_part(parser)*) {
|
||||
let mut expression = Expression::new(parser.fresh(), primary);
|
||||
for part in parts.into_iter() {
|
||||
let kind = match part {
|
||||
ExtendedPart::Index(indexers) => {
|
||||
ExpressionKind::Index { indexee: Box::new(expression), indexers }
|
||||
},
|
||||
ExtendedPart::Accessor(name) => {
|
||||
let name = rc_string(name);
|
||||
ExpressionKind::Access { name, expr: Box::new(expression) }
|
||||
},
|
||||
ExtendedPart::Call(arguments) => {
|
||||
ExpressionKind::Call { f: Box::new(expression), arguments }
|
||||
}
|
||||
};
|
||||
|
||||
expression = Expression::new(parser.fresh(), kind);
|
||||
}
|
||||
|
||||
expression.kind
|
||||
}
|
||||
|
||||
rule extended_expr_part(parser: &mut Parser) -> ExtendedPart<'input> =
|
||||
indexers:index_part(parser) { ExtendedPart::Index(indexers) } /
|
||||
arguments:call_part(parser) { ExtendedPart::Call(arguments) } /
|
||||
"." name:identifier() { ExtendedPart::Accessor(name) }
|
||||
|
||||
rule index_part(parser: &mut Parser) -> Vec<Expression> =
|
||||
"[" indexers:(expression(parser) ++ ",") "]" { indexers }
|
||||
|
||||
rule call_part(parser: &mut Parser) -> Vec<InvocationArgument> =
|
||||
"(" arguments:(invocation_argument(parser) ** ",") ")" { arguments }
|
||||
|
||||
//TODO this shouldn't be an expression b/c type annotations disallowed here
|
||||
rule invocation_argument(parser: &mut Parser) -> InvocationArgument =
|
||||
_ "_" _ { InvocationArgument::Ignored } /
|
||||
_ ident:identifier() _ "=" _ expr:expression(parser) { InvocationArgument::Keyword {
|
||||
name: Rc::new(ident.to_string()),
|
||||
expr
|
||||
} } /
|
||||
_ expr:expression(parser) _ { InvocationArgument::Positional(expr) }
|
||||
|
||||
|
||||
rule primary(struct_ok: bool, parser: &mut Parser) -> ExpressionKind =
|
||||
while_expr(parser) / for_expr(parser) / float_literal() / nat_literal() / bool_literal() /
|
||||
string_literal() / paren_expr(parser) /
|
||||
list_expr(parser) / if_expr(parser) / lambda_expr(parser) /
|
||||
item:named_struct(parser) {? if struct_ok { Ok(item) } else { Err("no-struct-allowed") } } /
|
||||
identifier_expr(parser)
|
||||
|
||||
rule lambda_expr(parser: &mut Parser) -> ExpressionKind =
|
||||
r#"\"# __ "(" _ params:formal_params(parser) _ ")" _ type_anno:(type_anno()?) _ body:block(parser) {
|
||||
ExpressionKind::Lambda { params, type_anno, body }
|
||||
} /
|
||||
r#"\"# param:formal_param(parser) _ type_anno:(type_anno()?) _ body:block(parser) {
|
||||
ExpressionKind::Lambda { params: vec![param], type_anno, body }
|
||||
}
|
||||
|
||||
rule for_expr(parser: &mut Parser) -> ExpressionKind =
|
||||
"for" _ enumerators:for_enumerators(parser) _ body:for_body(parser) {
|
||||
ExpressionKind::ForExpression { enumerators, body }
|
||||
}
|
||||
|
||||
rule for_enumerators(parser: &mut Parser) -> Vec<Enumerator> =
|
||||
"{" _ enumerators:(enumerator(parser) ++ ",") _ "}" { enumerators } /
|
||||
enumerator:enumerator(parser) { vec![enumerator] }
|
||||
|
||||
//TODO add guards, etc.
|
||||
rule enumerator(parser: &mut Parser) -> Enumerator =
|
||||
ident:identifier() _ "<-" _ generator:expression_no_struct(parser) {
|
||||
Enumerator { id: Rc::new(ident.to_string()), generator }
|
||||
} /
|
||||
//TODO need to distinguish these two cases in AST
|
||||
ident:identifier() _ "=" _ generator:expression_no_struct(parser) {
|
||||
Enumerator { id: Rc::new(ident.to_string()), generator }
|
||||
}
|
||||
|
||||
rule for_body(parser: &mut Parser) -> Box<ForBody> =
|
||||
"return" _ expr:expression(parser) { Box::new(ForBody::MonadicReturn(expr)) } /
|
||||
body:block(parser) { Box::new(ForBody::StatementBlock(body)) }
|
||||
|
||||
rule while_expr(parser: &mut Parser) -> ExpressionKind =
|
||||
"while" _ cond:expression_kind(false, parser)? _ body:block(parser) {
|
||||
ExpressionKind::WhileExpression {
|
||||
condition: cond.map(|kind| Box::new(Expression::new(parser.fresh(), kind))),
|
||||
body,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
rule identifier_expr(parser: &mut Parser) -> ExpressionKind =
|
||||
qn:qualified_identifier(parser) { ExpressionKind::Value(qn) }
|
||||
|
||||
rule named_struct(parser: &mut Parser) -> ExpressionKind =
|
||||
name:qualified_identifier(parser) _ fields:record_block(parser) {
|
||||
ExpressionKind::NamedStruct {
|
||||
name,
|
||||
fields: fields.into_iter().map(|(n, exp)| (Rc::new(n.to_string()), exp)).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//TODO anonymous structs, update syntax for structs
|
||||
rule record_block(parser: &mut Parser) -> Vec<(&'input str, Expression)> =
|
||||
"{" _ entries:(record_entry(parser) ** ",") _ "}" { entries }
|
||||
|
||||
rule record_entry(parser: &mut Parser) -> (&'input str, Expression) =
|
||||
_ name:identifier() _ ":" _ expr:expression(parser) _ { (name, expr) }
|
||||
|
||||
rule qualified_identifier(parser: &mut Parser) -> QualifiedName =
|
||||
names:(identifier() ++ "::") { QualifiedName { id: parser.fresh(), components: names.into_iter().map(|name| Rc::new(name.to_string())).collect() } }
|
||||
|
||||
//TODO improve the definition of identifiers
|
||||
rule identifier() -> &'input str =
|
||||
!(reserved() !(ident_continuation())) text:$(['a'..='z' | 'A'..='Z' | '_'] ident_continuation()*) { text }
|
||||
|
||||
rule ident_continuation() -> &'input str =
|
||||
text:$(['a'..='z' | 'A'..='Z' | '0'..='9' | '_'])
|
||||
|
||||
rule reserved() = "if" / "then" / "else" / "is" / "fn" / "for" / "while" / "let" / "in" / "mut" / "return" /
|
||||
"break" / "alias" / "type" / "self" / "Self" / "interface" / "impl" / "true" / "false" / "module" / "import"
|
||||
|
||||
|
||||
rule if_expr(parser: &mut Parser) -> ExpressionKind =
|
||||
"if" _ discriminator:(expression(parser)?) _ body:if_expr_body(parser) {
|
||||
ExpressionKind::IfExpression {
|
||||
discriminator: discriminator.map(Box::new),
|
||||
body: Box::new(body),
|
||||
}
|
||||
}
|
||||
|
||||
rule if_expr_body(parser: &mut Parser) -> IfExpressionBody =
|
||||
cond_block(parser) / simple_pattern_match(parser) / simple_conditional(parser)
|
||||
|
||||
rule simple_conditional(parser: &mut Parser) -> IfExpressionBody =
|
||||
"then" _ then_case:expr_or_block(parser) _ else_case:else_case(parser) {
|
||||
IfExpressionBody::SimpleConditional { then_case, else_case }
|
||||
}
|
||||
|
||||
rule simple_pattern_match(parser: &mut Parser) -> IfExpressionBody =
|
||||
"is" _ pattern:pattern(parser) _ "then" _ then_case:expr_or_block(parser) _ else_case:else_case(parser) {
|
||||
IfExpressionBody::SimplePatternMatch { pattern, then_case, else_case }
|
||||
}
|
||||
|
||||
rule cond_block(parser: &mut Parser) -> IfExpressionBody =
|
||||
"{" __ cond_arms:(cond_arm(parser) ++ (delimiter()+)) __ "}" { IfExpressionBody::CondList(cond_arms) }
|
||||
|
||||
rule cond_arm(parser: &mut Parser) -> ConditionArm =
|
||||
_ "else" _ body:expr_or_block(parser) { ConditionArm { condition: Condition::Else, guard: None, body } } /
|
||||
_ condition:condition(parser) _ guard:condition_guard(parser) _ "then" _ body:expr_or_block(parser)
|
||||
{ ConditionArm { condition, guard, body } }
|
||||
|
||||
rule condition(parser: &mut Parser) -> Condition =
|
||||
"is" _ pat:pattern(parser) { Condition::Pattern(pat) } /
|
||||
op:operator() _ expr:expression(parser) { Condition::TruncatedOp(BinOp::from_sigil(op), expr) }
|
||||
|
||||
rule condition_guard(parser: &mut Parser) -> Option<Expression> =
|
||||
("if" _ expr:expression(parser) { expr } )?
|
||||
|
||||
rule expr_or_block(parser: &mut Parser) -> Block = block(parser) / pos:position!() ex:expression(parser) {
|
||||
Statement {
|
||||
id: parser.fresh() , location: pos.into(),
|
||||
kind: StatementKind::Expression(ex)
|
||||
}.into()
|
||||
}
|
||||
|
||||
rule else_case(parser: &mut Parser) -> Option<Block> =
|
||||
("else" _ eorb:expr_or_block(parser) { eorb })?
|
||||
|
||||
rule pattern(parser: &mut Parser) -> Pattern =
|
||||
"(" _ variants:(pattern(parser) ++ ",") _ ")" { Pattern::TuplePattern(variants) } /
|
||||
_ pat:simple_pattern(parser) { pat }
|
||||
|
||||
rule simple_pattern(parser: &mut Parser) -> Pattern =
|
||||
pattern_literal() /
|
||||
qn:qualified_identifier(parser) "(" members:(pattern(parser) ** ",") ")" {
|
||||
Pattern::TupleStruct(qn, members)
|
||||
} /
|
||||
qn:qualified_identifier(parser) _ "{" _ items:(record_pattern_entry(parser) ** ",") "}" _ {
|
||||
let items = items.into_iter().map(|(name, pat)| (Rc::new(name.to_string()), pat)).collect();
|
||||
Pattern::Record(qn, items)
|
||||
} /
|
||||
qn:qualified_identifier(parser) { Pattern::VarOrName(qn) }
|
||||
|
||||
rule record_pattern_entry(parser: &mut Parser) -> (&'input str, Pattern) =
|
||||
_ name:identifier() _ ":" _ pat:pattern(parser) _ { (name, pat) } /
|
||||
_ name:identifier() _ {
|
||||
let qn = QualifiedName {
|
||||
id: parser.fresh(),
|
||||
components: vec![Rc::new(name.to_string())],
|
||||
};
|
||||
(name, Pattern::VarOrName(qn))
|
||||
}
|
||||
|
||||
|
||||
rule pattern_literal() -> Pattern =
|
||||
"true" { Pattern::Literal(PatternLiteral::BoolPattern(true)) } /
|
||||
"false" { Pattern::Literal(PatternLiteral::BoolPattern(false)) } /
|
||||
s:bare_string_literal() { Pattern::Literal(PatternLiteral::StringPattern(Rc::new(s.to_string()))) } /
|
||||
sign:("-"?) num:(float_literal() / nat_literal()) {
|
||||
let neg = sign.is_some();
|
||||
Pattern::Literal(PatternLiteral::NumPattern { neg, num })
|
||||
} /
|
||||
"_" { Pattern::Ignored }
|
||||
|
||||
|
||||
rule list_expr(parser: &mut Parser) -> ExpressionKind =
|
||||
"[" exprs:(expression(parser) ** ",") "]" {
|
||||
let mut exprs = exprs;
|
||||
ExpressionKind::ListLiteral(exprs)
|
||||
}
|
||||
|
||||
rule paren_expr(parser: &mut Parser) -> ExpressionKind =
|
||||
"(" exprs:(expression(parser) ** ",") ")" {
|
||||
let mut exprs = exprs;
|
||||
match exprs.len() {
|
||||
1 => exprs.pop().unwrap().kind,
|
||||
_ => ExpressionKind::TupleLiteral(exprs),
|
||||
}
|
||||
}
|
||||
|
||||
//TODO need to do something with prefix in the AST
|
||||
rule string_literal() -> ExpressionKind =
|
||||
prefix:identifier()? s:bare_string_literal(){ ExpressionKind::StringLiteral(Rc::new(s.to_string())) }
|
||||
|
||||
rule bare_string_literal() -> &'input str =
|
||||
"\"" s:$(string_component()*) "\"" { s }
|
||||
|
||||
rule string_component() -> &'input str =
|
||||
r#"\\"# { "\\" } /
|
||||
r#"\""# { "\"" } /
|
||||
r#"\t"# { "\t" } /
|
||||
r#"\n"# { "\n" } /
|
||||
ch:$([^ '"' ]) { ch }
|
||||
|
||||
rule bool_literal() -> ExpressionKind =
|
||||
"true" { ExpressionKind::BoolLiteral(true) } / "false" { ExpressionKind::BoolLiteral(false) }
|
||||
|
||||
rule nat_literal() -> ExpressionKind =
|
||||
bin_literal() / hex_literal() / unmarked_literal()
|
||||
|
||||
rule unmarked_literal() -> ExpressionKind =
|
||||
digits:digits() { ExpressionKind::NatLiteral(digits.parse().unwrap()) }
|
||||
|
||||
rule bin_literal() -> ExpressionKind =
|
||||
"0b" digits:bin_digits() {? parse_binary(digits).map(ExpressionKind::NatLiteral) }
|
||||
|
||||
rule hex_literal() -> ExpressionKind =
|
||||
"0x" digits:hex_digits() {? parse_hex(digits).map(ExpressionKind::NatLiteral) }
|
||||
|
||||
rule float_literal() -> ExpressionKind =
|
||||
ds:$( digits() "." digits()? / "." digits() ) { ExpressionKind::FloatLiteral(ds.parse().unwrap()) }
|
||||
|
||||
rule digits() -> &'input str = $((digit_group() "_"*)+)
|
||||
rule bin_digits() -> &'input str = $((bin_digit_group() "_"*)+)
|
||||
rule hex_digits() -> &'input str = $((hex_digit_group() "_"*)+)
|
||||
|
||||
rule digit_group() -> &'input str = $(['0'..='9']+)
|
||||
rule bin_digit_group() -> &'input str = $(['0' | '1']+)
|
||||
rule hex_digit_group() -> &'input str = $(['0'..='9' | 'a'..='f' | 'A'..='F']+)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_binary(digits: &str) -> Result<u64, &'static str> {
|
||||
let mut result: u64 = 0;
|
||||
let mut multiplier = 1;
|
||||
for d in digits.chars().rev() {
|
||||
match d {
|
||||
'1' => result += multiplier,
|
||||
'0' => (),
|
||||
'_' => continue,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
multiplier = match multiplier.checked_mul(2) {
|
||||
Some(m) => m,
|
||||
None => return Err("Binary expression will overflow"),
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn parse_hex(digits: &str) -> Result<u64, &'static str> {
|
||||
let mut result: u64 = 0;
|
||||
let mut multiplier: u64 = 1;
|
||||
for d in digits.chars().rev() {
|
||||
if d == '_' {
|
||||
continue;
|
||||
}
|
||||
match d.to_digit(16) {
|
||||
Some(n) => result += n as u64 * multiplier,
|
||||
None => return Err("Internal parser error: invalid hex digit"),
|
||||
}
|
||||
multiplier = match multiplier.checked_mul(16) {
|
||||
Some(m) => m,
|
||||
None => return Err("Hexadecimal expression will overflow"),
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct BinopSequence {
|
||||
first: ExpressionKind,
|
||||
next: Vec<(BinOp, ExpressionKind)>,
|
||||
}
|
||||
|
||||
impl BinopSequence {
|
||||
fn do_precedence(self, parser: &mut Parser) -> ExpressionKind {
|
||||
fn helper(
|
||||
precedence: i32,
|
||||
lhs: ExpressionKind,
|
||||
rest: &mut Vec<(BinOp, ExpressionKind)>,
|
||||
parser: &mut Parser,
|
||||
) -> Expression {
|
||||
let mut lhs = Expression::new(parser.fresh(), lhs);
|
||||
loop {
|
||||
let (next_op, next_rhs) = match rest.pop() {
|
||||
Some((a, b)) => (a, b),
|
||||
None => break,
|
||||
};
|
||||
let new_precedence = next_op.get_precedence();
|
||||
if precedence >= new_precedence {
|
||||
rest.push((next_op, next_rhs));
|
||||
break;
|
||||
}
|
||||
let rhs = helper(new_precedence, next_rhs, rest, parser);
|
||||
lhs = Expression::new(
|
||||
parser.fresh(),
|
||||
ExpressionKind::BinExp(next_op, Box::new(lhs), Box::new(rhs)),
|
||||
);
|
||||
}
|
||||
lhs
|
||||
}
|
||||
let mut as_stack = self.next.into_iter().rev().collect();
|
||||
helper(BinOp::min_precedence(), self.first, &mut as_stack, parser).kind
|
||||
}
|
||||
}
|
@ -6,8 +6,8 @@ use std::{fmt::Write, rc::Rc};
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::new::{schala_parser, Parser};
|
||||
use crate::{ast::*, parsing::Location};
|
||||
use super::{tokenize, ParseResult, Parser};
|
||||
use crate::{ast::*, tokenizing::Location};
|
||||
|
||||
fn rc(s: &str) -> Rc<String> {
|
||||
Rc::new(s.to_owned())
|
||||
@ -17,6 +17,18 @@ fn bx<T>(item: T) -> Box<T> {
|
||||
Box::new(item)
|
||||
}
|
||||
|
||||
fn make_parser(input: &str) -> Parser {
|
||||
let tokens: Vec<crate::tokenizing::Token> = tokenize(input);
|
||||
let mut parser = super::Parser::new();
|
||||
parser.add_new_tokens(tokens);
|
||||
parser
|
||||
}
|
||||
|
||||
fn parse(input: &str) -> ParseResult<AST> {
|
||||
let mut parser = make_parser(input);
|
||||
parser.parse()
|
||||
}
|
||||
|
||||
fn stmt(kind: StatementKind) -> Statement {
|
||||
Statement { location: Location::default(), id: ItemId::default(), kind }
|
||||
}
|
||||
@ -87,43 +99,33 @@ fn ty_simple(name: &str) -> TypeIdentifier {
|
||||
|
||||
macro_rules! assert_ast {
|
||||
($input:expr, $statements:expr) => {
|
||||
let mut parser = Parser::new();
|
||||
let ast = schala_parser::program($input, &mut parser);
|
||||
let ast = parse($input).unwrap();
|
||||
let expected = AST { id: Default::default(), statements: $statements.into() };
|
||||
if ast.is_err() {
|
||||
println!("Parse error: {}", ast.unwrap_err());
|
||||
panic!();
|
||||
}
|
||||
assert_eq!(ast.unwrap(), expected);
|
||||
println!("Expected: {}", expected);
|
||||
println!("Actual: {}", ast);
|
||||
assert_eq!(ast, expected);
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! assert_fail {
|
||||
($input:expr, $failure:expr) => {
|
||||
let mut parser = Parser::new();
|
||||
let err = schala_parser::program($input, &mut parser).unwrap_err();
|
||||
assert_eq!(err.to_string(), $failure);
|
||||
let err = parse($input).unwrap_err();
|
||||
assert_eq!(err.msg, $failure);
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! assert_expr {
|
||||
($input:expr, $correct:expr) => {
|
||||
let mut parser = Parser::new();
|
||||
let expr = schala_parser::expression($input, &mut parser);
|
||||
if expr.is_err() {
|
||||
println!("Expression parse error: {}", expr.unwrap_err());
|
||||
panic!();
|
||||
}
|
||||
assert_eq!(expr.unwrap(), $correct);
|
||||
let mut parser = make_parser($input);
|
||||
assert_eq!(parser.expression().unwrap(), $correct);
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! assert_fail_expr {
|
||||
($input:expr, $failure:expr) => {
|
||||
let mut parser = Parser::new();
|
||||
let _err = schala_parser::expression($input, &mut parser).unwrap_err();
|
||||
//TODO make real tests for failures
|
||||
//assert_eq!(err.to_string(), $failure);
|
||||
let mut parser = make_parser($input);
|
||||
let err = parser.expression().unwrap_err();
|
||||
assert_eq!(err.msg, $failure);
|
||||
};
|
||||
}
|
||||
#[test]
|
||||
@ -139,17 +141,7 @@ fn basic_literals() {
|
||||
assert_expr!("0xf_f_", expr(NatLiteral(255)));
|
||||
assert_expr!("false", expr(BoolLiteral(false)));
|
||||
assert_expr!("true", expr(BoolLiteral(true)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn string_literals() {
|
||||
use ExpressionKind::*;
|
||||
|
||||
assert_expr!(r#""""#, expr(StringLiteral(rc(""))));
|
||||
assert_expr!(r#""hello""#, expr(StringLiteral(rc("hello"))));
|
||||
assert_expr!(r#"b"some bytestring""#, expr(StringLiteral(rc("some bytestring"))));
|
||||
//NOTE I'm not 100% sure this case is correct, but I'll deal with it later
|
||||
assert_expr!(r#""Do \n \" escapes work\t""#, expr(StringLiteral(rc(r#"Do \n \" escapes work\t"#))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -158,7 +150,6 @@ fn list_literals() {
|
||||
|
||||
assert_expr!("[]", expr(ListLiteral(vec![])));
|
||||
assert_expr!("[1,2]", expr(ListLiteral(vec![expr(NatLiteral(1)), expr(NatLiteral(2)),])));
|
||||
assert_fail_expr!("[1,,2]", "some failure");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -167,13 +158,17 @@ fn binexps() {
|
||||
use StatementKind::Expression;
|
||||
|
||||
assert_expr!("0xf_f_+1", binop("+", expr(NatLiteral(255)), expr(NatLiteral(1))));
|
||||
assert_ast!(
|
||||
"3; 4; 4.3",
|
||||
vec![
|
||||
stmt(Expression(expr(NatLiteral(3)))),
|
||||
stmt(Expression(expr(NatLiteral(4)))),
|
||||
stmt(Expression(expr(FloatLiteral(4.3)))),
|
||||
]
|
||||
assert_eq!(
|
||||
parse("3; 4; 4.3").unwrap(),
|
||||
AST {
|
||||
id: Default::default(),
|
||||
statements: vec![
|
||||
stmt(Expression(expr(NatLiteral(3)))),
|
||||
stmt(Expression(expr(NatLiteral(4)))),
|
||||
stmt(Expression(expr(FloatLiteral(4.3)))),
|
||||
]
|
||||
.into()
|
||||
}
|
||||
);
|
||||
|
||||
assert_expr!(
|
||||
@ -312,22 +307,12 @@ fn named_struct() {
|
||||
fn index() {
|
||||
use ExpressionKind::*;
|
||||
assert_expr!(
|
||||
"armok[b,c]",
|
||||
"a[b,c]",
|
||||
expr(Index {
|
||||
indexee: bx(expr(Value(qn!(armok)))),
|
||||
indexee: bx(expr(Value(qn!(a)))),
|
||||
indexers: vec![expr(Value(qn!(b))), expr(Value(qn!(c)))]
|
||||
})
|
||||
);
|
||||
assert_expr!(
|
||||
"a[b,c][1]",
|
||||
expr(Index {
|
||||
indexee: bx(expr(Index {
|
||||
indexee: bx(expr(Value(qn!(a)))),
|
||||
indexers: vec![expr(Value(qn!(b))), expr(Value(qn!(c)))]
|
||||
})),
|
||||
indexers: vec![expr(NatLiteral(1))]
|
||||
})
|
||||
);
|
||||
assert_expr!(
|
||||
"perspicacity()[a]",
|
||||
expr(Index {
|
||||
@ -372,7 +357,7 @@ fn for_expression() {
|
||||
);
|
||||
|
||||
assert_expr!(
|
||||
"for n <- someRange { f(n) ; }",
|
||||
"for n <- someRange { f(n); }",
|
||||
expr(ForExpression {
|
||||
enumerators: vec![Enumerator { id: rc("n"), generator: expr(Value(qn!(someRange))) }],
|
||||
body: bx(ForBody::StatementBlock(
|
||||
@ -480,9 +465,8 @@ fn single_param_lambda() {
|
||||
fn complex_lambdas() {
|
||||
use ExpressionKind::*;
|
||||
|
||||
//TODO support this without the semicolon after the lambda
|
||||
assert_ast! {
|
||||
r#"fn wahoo() { let a = 10; \(x) { x + a }; }
|
||||
r#"fn wahoo() { let a = 10; \(x) { x + a } };
|
||||
wahoo()(3) "#,
|
||||
vec![
|
||||
fn_decl(Signature { name: rc("wahoo"), operator: false, type_anno: None, params: vec![] },
|
||||
@ -517,7 +501,7 @@ fn complex_lambdas() {
|
||||
|
||||
#[test]
|
||||
fn reserved_words() {
|
||||
assert_fail!("module::item::call()", "error at 1:7: expected ['a' ..= 'z' | 'A' ..= 'Z' | '_']");
|
||||
assert_fail!("module::item::call()", "Expected an identifier, got Colon");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -781,7 +765,7 @@ fn functions() {
|
||||
]
|
||||
})
|
||||
);
|
||||
assert_fail!("a(b,,c)","error at 1:5: expected one of \"(\", \".\", \"0b\", \"0x\", \"[\", \"\\\"\", \"_\", \"false\", \"for\", \"if\", \"true\", \"while\", ['+' | '-' | '!'], ['0' ..= '9'], ['a' ..= 'z' | 'A' ..= 'Z' | '_'], r#\"\\\"#");
|
||||
assert_fail!("a(b,,c)", "Expected a literal expression, got Comma");
|
||||
|
||||
assert_ast!(
|
||||
"fn a(b, c: Int): Int",
|
||||
@ -802,19 +786,6 @@ fn functions() {
|
||||
type_anno: Some(TypeIdentifier::Singleton(TypeSingletonName { name: rc("Int"), params: vec![] })),
|
||||
})))]
|
||||
);
|
||||
|
||||
let source = r#"
|
||||
fn some_function() {
|
||||
|
||||
}"#;
|
||||
|
||||
assert_ast!(
|
||||
source,
|
||||
vec![fn_decl(
|
||||
Signature { name: rc("some_function"), operator: false, type_anno: None, params: vec![] },
|
||||
vec![].into()
|
||||
)]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -824,9 +795,7 @@ fn max_function_params() {
|
||||
write!(buf, "a{}, ", n).unwrap();
|
||||
}
|
||||
write!(buf, ") {{ return 20 }}").unwrap();
|
||||
//assert_fail!(&buf, "A function cannot have more than 255 arguments");
|
||||
//TODO better errors again
|
||||
assert_fail!(&buf, "error at 1:1439: expected ['a' ..= 'z' | 'A' ..= 'Z' | '_']");
|
||||
assert_fail!(&buf, "A function cannot have more than 255 arguments");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -914,43 +883,44 @@ fn interface() {
|
||||
|
||||
#[test]
|
||||
fn impls() {
|
||||
use Declaration::{FuncDecl, Impl};
|
||||
|
||||
let block = vec![
|
||||
FuncDecl(
|
||||
Signature { name: rc("yolo"), operator: false, params: vec![], type_anno: None },
|
||||
vec![].into(),
|
||||
),
|
||||
FuncDecl(
|
||||
Signature { name: rc("swagg"), operator: false, params: vec![], type_anno: None },
|
||||
vec![].into(),
|
||||
),
|
||||
];
|
||||
use Declaration::{FuncSig, Impl};
|
||||
|
||||
assert_ast!(
|
||||
"impl Heh { fn yolo() { }; fn swagg() { } }",
|
||||
vec![decl(Impl { type_name: ty_simple("Heh"), interface_name: None, block: block.clone() })]
|
||||
"impl Heh { fn yolo(); fn swagg(); }",
|
||||
vec![decl(Impl {
|
||||
type_name: ty_simple("Heh"),
|
||||
interface_name: None,
|
||||
block: vec![
|
||||
FuncSig(Signature { name: rc("yolo"), operator: false, params: vec![], type_anno: None }),
|
||||
FuncSig(Signature { name: rc("swagg"), operator: false, params: vec![], type_anno: None })
|
||||
]
|
||||
})]
|
||||
);
|
||||
|
||||
//TODO `"impl Heh<X> { fn yolo() { }; fn swagg() { }; }"` ought to work
|
||||
assert_ast!(
|
||||
"impl Heh<X> { fn yolo() { }; fn swagg() { } }",
|
||||
"impl Heh<X> { fn yolo(); fn swagg(); }",
|
||||
vec![decl(Impl {
|
||||
type_name: TypeIdentifier::Singleton(TypeSingletonName {
|
||||
name: rc("Heh"),
|
||||
params: vec![ty_simple("X")]
|
||||
}),
|
||||
interface_name: None,
|
||||
block: block.clone(),
|
||||
block: vec![
|
||||
FuncSig(Signature { name: rc("yolo"), operator: false, params: vec![], type_anno: None }),
|
||||
FuncSig(Signature { name: rc("swagg"), operator: false, params: vec![], type_anno: None })
|
||||
]
|
||||
})]
|
||||
);
|
||||
|
||||
assert_ast!(
|
||||
"impl Heh for Saraz { fn yolo() {}; fn swagg() {} }",
|
||||
"impl Heh for Saraz { fn yolo(); fn swagg(); }",
|
||||
vec![decl(Impl {
|
||||
type_name: ty_simple("Saraz"),
|
||||
interface_name: Some(TypeSingletonName { name: rc("Heh"), params: vec![] }),
|
||||
block: block.clone(),
|
||||
block: vec![
|
||||
FuncSig(Signature { name: rc("yolo"), operator: false, params: vec![], type_anno: None }),
|
||||
FuncSig(Signature { name: rc("swagg"), operator: false, params: vec![], type_anno: None })
|
||||
]
|
||||
})]
|
||||
);
|
||||
|
||||
@ -1178,7 +1148,7 @@ fn pattern_matching() {
|
||||
);
|
||||
|
||||
assert_expr!(
|
||||
"if x { is 1 then 5; else 20 }",
|
||||
"if x { is 1 then 5, else 20 }",
|
||||
expr(IfExpression {
|
||||
discriminator: Some(bx(expr(Value(qn!(x))))),
|
||||
body: bx(IfExpressionBody::CondList(vec![
|
||||
@ -1214,7 +1184,7 @@ fn pattern_matching() {
|
||||
assert_expr! {
|
||||
r#"
|
||||
if (45, "panda", false, 2.2) {
|
||||
is (49, "pablo", _, 28.4) then "no"
|
||||
is (49, "pablo", _, 28.4) then "no"
|
||||
is (_, "panda", _, -2.2) then "yes"
|
||||
is _ then "maybe"
|
||||
}"#,
|
||||
@ -1293,62 +1263,3 @@ fn flow_control() {
|
||||
)]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn blocks() {
|
||||
use ExpressionKind::*;
|
||||
|
||||
let cases = ["{ a }", "{ a; }", "{a}", "{ a\n }", "{ a\n\n }", "{ a;\n\n; }"];
|
||||
|
||||
let mut parser = Parser::new();
|
||||
for case in cases.iter() {
|
||||
let block = schala_parser::block(case, &mut parser);
|
||||
assert_eq!(block.unwrap(), vec![exst(Value(qn!(a)))].into());
|
||||
}
|
||||
|
||||
let source = r#"{
|
||||
fn quah() {
|
||||
fn foo() { }
|
||||
}
|
||||
}"#;
|
||||
let block = schala_parser::block(source, &mut parser);
|
||||
assert_eq!(
|
||||
block.unwrap(),
|
||||
vec![decl(Declaration::FuncDecl(
|
||||
Signature { name: rc("quah"), operator: false, params: vec![], type_anno: None },
|
||||
vec![decl(Declaration::FuncDecl(
|
||||
Signature { name: rc("foo"), operator: false, params: vec![], type_anno: None },
|
||||
vec![].into(),
|
||||
))]
|
||||
.into()
|
||||
))]
|
||||
.into()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn comments() {
|
||||
use ExpressionKind::*;
|
||||
|
||||
let source = "1 + /* hella /* bro */ */ 2";
|
||||
assert_expr!(source, binop("+", expr(NatLiteral(1)), expr(NatLiteral(2))));
|
||||
|
||||
//TODO make sure this error message makes sense
|
||||
let source = "1 + /* hella /* bro */ 2";
|
||||
assert_fail_expr!(source, "foo");
|
||||
|
||||
let source = "1 + /* hella */ bro */ 2";
|
||||
assert_fail_expr!(source, binop("+", expr(NatLiteral(1)), expr(NatLiteral(2))));
|
||||
|
||||
let source = "5//no man\n";
|
||||
assert_ast!(source, vec![exst(NatLiteral(5))]);
|
||||
}
|
||||
|
||||
//TODO support backtick operators like this
|
||||
/*
|
||||
#[test]
|
||||
fn backtick_operators() {
|
||||
let output = token_kinds("1 `plus` 2");
|
||||
assert_eq!(output, vec![digit!("1"), op!("plus"), digit!("2")]);
|
||||
}
|
||||
*/
|
||||
|
@ -247,6 +247,8 @@ impl<'a, 'b> Reducer<'a, 'b> {
|
||||
let mut alternatives = vec![];
|
||||
for arm in condition_arms {
|
||||
match arm.condition {
|
||||
ast::Condition::Expression(ref _expr) =>
|
||||
return Expression::ReductionError("case-expression".to_string()),
|
||||
ast::Condition::Pattern(ref pat) => {
|
||||
let alt = Alternative {
|
||||
pattern: match pat.reduce(self.symbol_table) {
|
||||
|
@ -5,7 +5,7 @@ use schala_repl::{
|
||||
use stopwatch::Stopwatch;
|
||||
|
||||
use crate::{
|
||||
error::SchalaError, parsing, reduced_ir, symbol_table, tree_walk_eval, type_inference,
|
||||
error::SchalaError, parsing, reduced_ir, symbol_table, tokenizing, tree_walk_eval, type_inference,
|
||||
};
|
||||
|
||||
/// All the state necessary to parse and execute a Schala program are stored in this struct.
|
||||
@ -19,7 +19,7 @@ pub struct Schala<'a> {
|
||||
/// Contains information for type-checking
|
||||
type_context: type_inference::TypeContext,
|
||||
/// Schala Parser
|
||||
active_parser: parsing::new::Parser,
|
||||
active_parser: parsing::Parser,
|
||||
|
||||
/// Execution state for AST-walking interpreter
|
||||
eval_state: tree_walk_eval::State<'a>,
|
||||
@ -47,7 +47,7 @@ impl<'a> Schala<'a> {
|
||||
source_reference: SourceReference::new(),
|
||||
symbol_table: symbol_table::SymbolTable::new(),
|
||||
type_context: type_inference::TypeContext::new(),
|
||||
active_parser: parsing::new::Parser::new(),
|
||||
active_parser: parsing::Parser::new(),
|
||||
eval_state: tree_walk_eval::State::new(),
|
||||
timings: Vec::new(),
|
||||
}
|
||||
@ -74,10 +74,18 @@ impl<'a> Schala<'a> {
|
||||
self.timings = vec![];
|
||||
let sw = Stopwatch::start_new();
|
||||
|
||||
self.source_reference.load_new_source(source);
|
||||
// 1st stage - tokenization
|
||||
// TODO tokenize should return its own error type
|
||||
let tokens = tokenizing::tokenize(source);
|
||||
if let Some(err) = SchalaError::from_tokens(&tokens) {
|
||||
return Err(err);
|
||||
}
|
||||
|
||||
//2nd stage - parsing
|
||||
self.active_parser.add_new_tokens(tokens);
|
||||
let ast = self
|
||||
.active_parser
|
||||
.parse(source)
|
||||
.parse()
|
||||
.map_err(|err| SchalaError::from_parse_error(err, &self.source_reference))?;
|
||||
self.timings.push(("parsing", sw.elapsed()));
|
||||
|
||||
@ -114,50 +122,31 @@ impl<'a> Schala<'a> {
|
||||
|
||||
/// Represents lines of source code
|
||||
pub(crate) struct SourceReference {
|
||||
last_source: Option<String>,
|
||||
/// Offsets in *bytes* (not chars) representing a newline character
|
||||
newline_offsets: Vec<usize>,
|
||||
lines: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl SourceReference {
|
||||
pub(crate) fn new() -> SourceReference {
|
||||
SourceReference { last_source: None, newline_offsets: vec![] }
|
||||
fn new() -> SourceReference {
|
||||
SourceReference { lines: None }
|
||||
}
|
||||
|
||||
pub(crate) fn load_new_source(&mut self, source: &str) {
|
||||
self.newline_offsets = vec![];
|
||||
for (offset, ch) in source.as_bytes().iter().enumerate() {
|
||||
if *ch == ('\n' as u8) {
|
||||
self.newline_offsets.push(offset);
|
||||
}
|
||||
}
|
||||
self.last_source = Some(source.to_string());
|
||||
fn load_new_source(&mut self, source: &str) {
|
||||
//TODO this is a lot of heap allocations - maybe there's a way to make it more efficient?
|
||||
self.lines = Some(source.lines().map(|s| s.to_string()).collect());
|
||||
}
|
||||
|
||||
// (line_start, line_num, the string itself)
|
||||
pub fn get_line(&self, line: usize) -> (usize, usize, String) {
|
||||
if self.newline_offsets.is_empty() {
|
||||
return (0, 0, self.last_source.as_ref().cloned().unwrap());
|
||||
}
|
||||
|
||||
//TODO make sure this is utf8-safe
|
||||
let start_idx = match self.newline_offsets.binary_search(&line) {
|
||||
Ok(index) | Err(index) => index,
|
||||
};
|
||||
|
||||
let last_source = self.last_source.as_ref().unwrap();
|
||||
|
||||
let start = self.newline_offsets[start_idx];
|
||||
let end = self.newline_offsets.get(start_idx + 1).cloned().unwrap_or_else(|| last_source.len());
|
||||
|
||||
let slice = &last_source.as_bytes()[start..end];
|
||||
(start, start_idx, std::str::from_utf8(slice).unwrap().to_string())
|
||||
pub fn get_line(&self, line: usize) -> String {
|
||||
self.lines
|
||||
.as_ref()
|
||||
.and_then(|x| x.get(line).map(|s| s.to_string()))
|
||||
.unwrap_or_else(|| "NO LINE FOUND".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) enum Stage {
|
||||
Tokenizing,
|
||||
Parsing,
|
||||
Symbols,
|
||||
ScopeResolution,
|
||||
@ -167,7 +156,7 @@ pub(crate) enum Stage {
|
||||
}
|
||||
|
||||
fn stage_names() -> Vec<&'static str> {
|
||||
vec!["parsing", "symbol-table", "typechecking", "ast-reduction", "ast-walking-evaluation"]
|
||||
vec!["tokenizing", "parsing", "symbol-table", "typechecking", "ast-reduction", "ast-walking-evaluation"]
|
||||
}
|
||||
|
||||
#[derive(Default, Clone)]
|
||||
@ -188,6 +177,7 @@ impl<'a> ProgrammingLanguageInterface for Schala<'a> {
|
||||
|
||||
fn run_computation(&mut self, request: ComputationRequest<Self::Config>) -> ComputationResponse {
|
||||
let ComputationRequest { source, debug_requests: _, config: _ } = request;
|
||||
self.source_reference.load_new_source(source);
|
||||
let sw = Stopwatch::start_new();
|
||||
|
||||
let main_output =
|
||||
|
@ -10,7 +10,7 @@ use crate::{
|
||||
ast,
|
||||
ast::ItemId,
|
||||
builtin::Builtin,
|
||||
parsing::Location,
|
||||
tokenizing::Location,
|
||||
type_inference::{TypeContext, TypeId},
|
||||
};
|
||||
|
||||
|
@ -11,7 +11,7 @@ use crate::{
|
||||
TypeSingletonName, Variant, VariantKind, AST,
|
||||
},
|
||||
builtin::Builtin,
|
||||
parsing::Location,
|
||||
tokenizing::Location,
|
||||
type_inference::{self, PendingType, TypeBuilder, TypeContext, VariantBuilder},
|
||||
};
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
use assert_matches::assert_matches;
|
||||
|
||||
use super::*;
|
||||
use crate::util::quick_ast;
|
||||
use crate::{tokenizing::Location, util::quick_ast};
|
||||
|
||||
fn add_symbols(src: &str) -> (SymbolTable, Result<(), Vec<SymbolError>>) {
|
||||
let ast = quick_ast(src);
|
||||
@ -79,11 +79,9 @@ fn no_type_definition_duplicates() {
|
||||
let err = &errs[0];
|
||||
|
||||
match err {
|
||||
SymbolError::DuplicateName { location: _, prev_name } => {
|
||||
SymbolError::DuplicateName { location, prev_name } => {
|
||||
assert_eq!(prev_name, &Fqsn::from_strs(&["Food"]));
|
||||
|
||||
//TODO restore this Location test
|
||||
//assert_eq!(location, &Location { line_num: 2, char_num: 2 });
|
||||
assert_eq!(location, &Location { line_num: 2, char_num: 2 });
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
@ -137,7 +135,7 @@ fn dont_falsely_detect_duplicates() {
|
||||
let a = 40;
|
||||
77
|
||||
}
|
||||
let q = 39
|
||||
let q = 39;
|
||||
"#;
|
||||
let (symbols, _) = add_symbols(source);
|
||||
|
||||
@ -173,8 +171,7 @@ fn second_inner_func() {
|
||||
}
|
||||
|
||||
inner_func(x)
|
||||
}
|
||||
"#;
|
||||
}"#;
|
||||
let (symbols, _) = add_symbols(source);
|
||||
assert!(symbols.fq_names.table.get(&make_fqsn(&["outer_func"])).is_some());
|
||||
assert!(symbols.fq_names.table.get(&make_fqsn(&["outer_func", "inner_func"])).is_some());
|
||||
@ -190,8 +187,7 @@ inner_func(x)
|
||||
fn enclosing_scopes_3() {
|
||||
let source = r#"
|
||||
fn outer_func(x) {
|
||||
|
||||
fn inner_func(arg) {
|
||||
fn inner_func(arg) {
|
||||
arg
|
||||
}
|
||||
|
||||
|
460
schala-lang/language/src/tokenizing.rs
Normal file
460
schala-lang/language/src/tokenizing.rs
Normal file
@ -0,0 +1,460 @@
|
||||
#![allow(clippy::upper_case_acronyms)]
|
||||
|
||||
use std::{
|
||||
convert::{TryFrom, TryInto},
|
||||
fmt,
|
||||
iter::{Iterator, Peekable},
|
||||
rc::Rc,
|
||||
};
|
||||
|
||||
use itertools::Itertools;
|
||||
|
||||
/// A location in a particular source file. Note that the
|
||||
/// sizes of the internal unsigned integer types limit
|
||||
/// the size of a source file to 2^32 lines of
|
||||
/// at most 2^16 characters, which should be plenty big.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Default)]
|
||||
pub struct Location {
|
||||
pub(crate) line_num: u32,
|
||||
pub(crate) char_num: u16,
|
||||
}
|
||||
|
||||
impl fmt::Display for Location {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}:{}", self.line_num, self.char_num)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum TokenKind {
|
||||
Newline,
|
||||
Semicolon,
|
||||
|
||||
LParen,
|
||||
RParen,
|
||||
LSquareBracket,
|
||||
RSquareBracket,
|
||||
LAngleBracket,
|
||||
RAngleBracket,
|
||||
LCurlyBrace,
|
||||
RCurlyBrace,
|
||||
Pipe,
|
||||
Backslash,
|
||||
AtSign,
|
||||
|
||||
Comma,
|
||||
Period,
|
||||
Colon,
|
||||
Underscore,
|
||||
Slash,
|
||||
Equals,
|
||||
|
||||
Operator(Rc<String>),
|
||||
DigitGroup(Rc<String>),
|
||||
HexLiteral(Rc<String>),
|
||||
BinNumberSigil,
|
||||
StrLiteral { s: Rc<String>, prefix: Option<Rc<String>> },
|
||||
Identifier(Rc<String>),
|
||||
Keyword(Kw),
|
||||
|
||||
EOF,
|
||||
|
||||
Error(String),
|
||||
}
|
||||
use self::TokenKind::*;
|
||||
|
||||
impl fmt::Display for TokenKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
&Operator(ref s) => write!(f, "Operator({})", **s),
|
||||
&DigitGroup(ref s) => write!(f, "DigitGroup({})", s),
|
||||
&HexLiteral(ref s) => write!(f, "HexLiteral({})", s),
|
||||
&StrLiteral { ref s, .. } => write!(f, "StrLiteral({})", s),
|
||||
&Identifier(ref s) => write!(f, "Identifier({})", s),
|
||||
&Error(ref s) => write!(f, "Error({})", s),
|
||||
other => write!(f, "{:?}", other),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum Kw {
|
||||
If,
|
||||
Then,
|
||||
Else,
|
||||
Is,
|
||||
Func,
|
||||
For,
|
||||
While,
|
||||
Let,
|
||||
In,
|
||||
Mut,
|
||||
Return,
|
||||
Continue,
|
||||
Break,
|
||||
Alias,
|
||||
Type,
|
||||
SelfType,
|
||||
SelfIdent,
|
||||
Interface,
|
||||
Impl,
|
||||
True,
|
||||
False,
|
||||
Module,
|
||||
Import,
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for Kw {
|
||||
type Error = ();
|
||||
|
||||
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
||||
Ok(match value {
|
||||
"if" => Kw::If,
|
||||
"then" => Kw::Then,
|
||||
"else" => Kw::Else,
|
||||
"is" => Kw::Is,
|
||||
"fn" => Kw::Func,
|
||||
"for" => Kw::For,
|
||||
"while" => Kw::While,
|
||||
"let" => Kw::Let,
|
||||
"in" => Kw::In,
|
||||
"mut" => Kw::Mut,
|
||||
"return" => Kw::Return,
|
||||
"break" => Kw::Break,
|
||||
"continue" => Kw::Continue,
|
||||
"alias" => Kw::Alias,
|
||||
"type" => Kw::Type,
|
||||
"Self" => Kw::SelfType,
|
||||
"self" => Kw::SelfIdent,
|
||||
"interface" => Kw::Interface,
|
||||
"impl" => Kw::Impl,
|
||||
"true" => Kw::True,
|
||||
"false" => Kw::False,
|
||||
"module" => Kw::Module,
|
||||
"import" => Kw::Import,
|
||||
_ => return Err(()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Token {
|
||||
pub kind: TokenKind,
|
||||
pub(crate) location: Location,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn to_string_with_metadata(&self) -> String {
|
||||
format!("{}({})", self.kind, self.location)
|
||||
}
|
||||
|
||||
pub fn get_kind(&self) -> TokenKind {
|
||||
self.kind.clone()
|
||||
}
|
||||
}
|
||||
|
||||
const OPERATOR_CHARS: [char; 17] =
|
||||
['!', '$', '%', '&', '*', '+', '-', '.', ':', '<', '>', '=', '?', '^', '|', '~', '`'];
|
||||
fn is_operator(c: &char) -> bool {
|
||||
OPERATOR_CHARS.iter().any(|x| x == c)
|
||||
}
|
||||
|
||||
type CharData = (usize, usize, char);
|
||||
|
||||
pub fn tokenize(input: &str) -> Vec<Token> {
|
||||
let mut tokens: Vec<Token> = Vec::new();
|
||||
|
||||
let mut input = Iterator::intersperse(input.lines().enumerate(), (0, "\n"))
|
||||
.flat_map(|(line_idx, line)| line.chars().enumerate().map(move |(ch_idx, ch)| (line_idx, ch_idx, ch)))
|
||||
.peekable();
|
||||
|
||||
while let Some((line_num, char_num, c)) = input.next() {
|
||||
let cur_tok_kind = match c {
|
||||
'/' => match input.peek().map(|t| t.2) {
|
||||
Some('/') => {
|
||||
for (_, _, c) in input.by_ref() {
|
||||
if c == '\n' {
|
||||
break;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
Some('*') => {
|
||||
input.next();
|
||||
let mut comment_level = 1;
|
||||
while let Some((_, _, c)) = input.next() {
|
||||
if c == '*' && input.peek().map(|t| t.2) == Some('/') {
|
||||
input.next();
|
||||
comment_level -= 1;
|
||||
} else if c == '/' && input.peek().map(|t| t.2) == Some('*') {
|
||||
input.next();
|
||||
comment_level += 1;
|
||||
}
|
||||
if comment_level == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if comment_level != 0 {
|
||||
Error("Unclosed comment".to_string())
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
_ => Slash,
|
||||
},
|
||||
c if c.is_whitespace() && c != '\n' => continue,
|
||||
'\n' => Newline,
|
||||
';' => Semicolon,
|
||||
':' => Colon,
|
||||
',' => Comma,
|
||||
'(' => LParen,
|
||||
')' => RParen,
|
||||
'{' => LCurlyBrace,
|
||||
'}' => RCurlyBrace,
|
||||
'[' => LSquareBracket,
|
||||
']' => RSquareBracket,
|
||||
'"' => handle_quote(&mut input, None),
|
||||
'\\' => Backslash,
|
||||
'@' => AtSign,
|
||||
c if c.is_digit(10) => handle_digit(c, &mut input),
|
||||
c if c.is_alphabetic() || c == '_' => handle_alphabetic(c, &mut input),
|
||||
c if is_operator(&c) => handle_operator(c, &mut input),
|
||||
unknown => Error(format!("Unexpected character: {}", unknown)),
|
||||
};
|
||||
let location =
|
||||
Location { line_num: line_num.try_into().unwrap(), char_num: char_num.try_into().unwrap() };
|
||||
tokens.push(Token { kind: cur_tok_kind, location });
|
||||
}
|
||||
tokens
|
||||
}
|
||||
|
||||
fn handle_digit(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
|
||||
let next_ch = input.peek().map(|&(_, _, c)| c);
|
||||
|
||||
if c == '0' && next_ch == Some('x') {
|
||||
input.next();
|
||||
let rest: String = input
|
||||
.peeking_take_while(|&(_, _, ref c)| c.is_digit(16) || *c == '_')
|
||||
.map(|(_, _, c)| c)
|
||||
.collect();
|
||||
HexLiteral(Rc::new(rest))
|
||||
} else if c == '0' && next_ch == Some('b') {
|
||||
input.next();
|
||||
BinNumberSigil
|
||||
} else {
|
||||
let mut buf = c.to_string();
|
||||
buf.extend(input.peeking_take_while(|&(_, _, ref c)| c.is_digit(10)).map(|(_, _, c)| c));
|
||||
DigitGroup(Rc::new(buf))
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_quote(
|
||||
input: &mut Peekable<impl Iterator<Item = CharData>>,
|
||||
quote_prefix: Option<&str>,
|
||||
) -> TokenKind {
|
||||
let mut buf = String::new();
|
||||
loop {
|
||||
match input.next().map(|(_, _, c)| c) {
|
||||
Some('"') => break,
|
||||
Some('\\') => {
|
||||
let next = input.peek().map(|&(_, _, c)| c);
|
||||
if next == Some('n') {
|
||||
input.next();
|
||||
buf.push('\n')
|
||||
} else if next == Some('"') {
|
||||
input.next();
|
||||
buf.push('"');
|
||||
} else if next == Some('t') {
|
||||
input.next();
|
||||
buf.push('\t');
|
||||
}
|
||||
}
|
||||
Some(c) => buf.push(c),
|
||||
None => return TokenKind::Error("Unclosed string".to_string()),
|
||||
}
|
||||
}
|
||||
TokenKind::StrLiteral { s: Rc::new(buf), prefix: quote_prefix.map(|s| Rc::new(s.to_string())) }
|
||||
}
|
||||
|
||||
fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
|
||||
let mut buf = String::new();
|
||||
buf.push(c);
|
||||
let next_is_alphabetic = input.peek().map(|&(_, _, c)| !c.is_alphabetic()).unwrap_or(true);
|
||||
if c == '_' && next_is_alphabetic {
|
||||
return TokenKind::Underscore;
|
||||
}
|
||||
|
||||
loop {
|
||||
match input.peek().map(|&(_, _, c)| c) {
|
||||
Some(c) if c == '"' => {
|
||||
input.next();
|
||||
return handle_quote(input, Some(&buf));
|
||||
}
|
||||
Some(c) if c.is_alphanumeric() || c == '_' => {
|
||||
input.next();
|
||||
buf.push(c);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
match Kw::try_from(buf.as_str()) {
|
||||
Ok(kw) => TokenKind::Keyword(kw),
|
||||
Err(()) => TokenKind::Identifier(Rc::new(buf)),
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
|
||||
match c {
|
||||
'<' | '>' | '|' | '.' | '=' => {
|
||||
let next = &input.peek().map(|&(_, _, c)| c);
|
||||
let next_is_op = next.map(|n| is_operator(&n)).unwrap_or(false);
|
||||
if !next_is_op {
|
||||
return match c {
|
||||
'<' => LAngleBracket,
|
||||
'>' => RAngleBracket,
|
||||
'|' => Pipe,
|
||||
'.' => Period,
|
||||
'=' => Equals,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
|
||||
let mut buf = String::new();
|
||||
|
||||
if c == '`' {
|
||||
loop {
|
||||
match input.peek().map(|&(_, _, c)| c) {
|
||||
Some(c) if c.is_alphabetic() || c == '_' => {
|
||||
input.next();
|
||||
buf.push(c);
|
||||
}
|
||||
Some('`') => {
|
||||
input.next();
|
||||
break;
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
buf.push(c);
|
||||
loop {
|
||||
match input.peek().map(|&(_, _, c)| c) {
|
||||
Some(c) if is_operator(&c) => {
|
||||
input.next();
|
||||
buf.push(c);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
TokenKind::Operator(Rc::new(buf))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod schala_tokenizer_tests {
|
||||
use super::{Kw::*, *};
|
||||
|
||||
macro_rules! digit {
|
||||
($ident:expr) => {
|
||||
DigitGroup(Rc::new($ident.to_string()))
|
||||
};
|
||||
}
|
||||
macro_rules! ident {
|
||||
($ident:expr) => {
|
||||
Identifier(Rc::new($ident.to_string()))
|
||||
};
|
||||
}
|
||||
macro_rules! op {
|
||||
($ident:expr) => {
|
||||
Operator(Rc::new($ident.to_string()))
|
||||
};
|
||||
}
|
||||
|
||||
fn token_kinds(input: &str) -> Vec<TokenKind> {
|
||||
tokenize(input).into_iter().map(move |tok| tok.kind).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokens() {
|
||||
let output = token_kinds("let a: A<B> = c ++ d");
|
||||
assert_eq!(
|
||||
output,
|
||||
vec![
|
||||
Keyword(Let),
|
||||
ident!("a"),
|
||||
Colon,
|
||||
ident!("A"),
|
||||
LAngleBracket,
|
||||
ident!("B"),
|
||||
RAngleBracket,
|
||||
Equals,
|
||||
ident!("c"),
|
||||
op!("++"),
|
||||
ident!("d")
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn underscores() {
|
||||
let output = token_kinds("4_8");
|
||||
assert_eq!(output, vec![digit!("4"), Underscore, digit!("8")]);
|
||||
|
||||
let output = token_kinds("aba_yo");
|
||||
assert_eq!(output, vec![ident!("aba_yo")]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn comments() {
|
||||
let output = token_kinds("1 + /* hella /* bro */ */ 2");
|
||||
assert_eq!(output, vec![digit!("1"), op!("+"), digit!("2")]);
|
||||
|
||||
let output = token_kinds("1 + /* hella /* bro */ 2");
|
||||
assert_eq!(output, vec![digit!("1"), op!("+"), Error("Unclosed comment".to_string())]);
|
||||
|
||||
//TODO not sure if I want this behavior
|
||||
let output = token_kinds("1 + /* hella */ bro */ 2");
|
||||
assert_eq!(
|
||||
output,
|
||||
vec![
|
||||
digit!("1"),
|
||||
op!("+"),
|
||||
Identifier(Rc::new("bro".to_string())),
|
||||
Operator(Rc::new("*".to_string())),
|
||||
Slash,
|
||||
DigitGroup(Rc::new("2".to_string()))
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backtick_operators() {
|
||||
let output = token_kinds("1 `plus` 2");
|
||||
assert_eq!(output, vec![digit!("1"), op!("plus"), digit!("2")]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn string_literals() {
|
||||
let output = token_kinds(r#""some string""#);
|
||||
assert_eq!(output, vec![StrLiteral { s: Rc::new("some string".to_string()), prefix: None }]);
|
||||
|
||||
let output = token_kinds(r#"b"some bytestring""#);
|
||||
assert_eq!(
|
||||
output,
|
||||
vec![StrLiteral {
|
||||
s: Rc::new("some bytestring".to_string()),
|
||||
prefix: Some(Rc::new("b".to_string()))
|
||||
}]
|
||||
);
|
||||
|
||||
let output = token_kinds(r#""Do \n \" escapes work\t""#);
|
||||
assert_eq!(
|
||||
output,
|
||||
vec![StrLiteral { s: Rc::new("Do \n \" escapes work\t".to_string()), prefix: None }]
|
||||
);
|
||||
}
|
||||
}
|
@ -43,7 +43,7 @@ fn test_basic_eval() {
|
||||
|
||||
#[test]
|
||||
fn op_eval() {
|
||||
eval_assert("-13", "-13");
|
||||
eval_assert("- 13", "-13");
|
||||
eval_assert("10 - 2", "8");
|
||||
}
|
||||
|
||||
@ -96,7 +96,7 @@ trad()"#,
|
||||
);
|
||||
|
||||
let err =
|
||||
"No symbol found for name: QualifiedName { id: Id { idx: 9, t: PhantomData }, components: [\"a\"] }";
|
||||
"No symbol found for name: QualifiedName { id: Id { idx: 4, t: PhantomData }, components: [\"a\"] }";
|
||||
|
||||
eval_assert_failure(
|
||||
r#"
|
||||
@ -271,26 +271,26 @@ fn full_if_matching() {
|
||||
let source = r#"
|
||||
type Option<T> = Some(T) | None
|
||||
let a = Option::None
|
||||
if a { is Option::None then 4; is Option::Some(x) then x }
|
||||
if a { is Option::None then 4, is Option::Some(x) then x }
|
||||
"#;
|
||||
eval_assert(source, "4");
|
||||
|
||||
let source = r#"
|
||||
type Option<T> = Some(T) | None
|
||||
let sara = Option::Some(99)
|
||||
if sara { is Option::None then 1 + 3; is Option::Some(x) then x }
|
||||
if sara { is Option::None then 1 + 3, is Option::Some(x) then x }
|
||||
"#;
|
||||
eval_assert(source, "99");
|
||||
|
||||
let source = r#"
|
||||
let a = 10
|
||||
if a { is 10 then "x"; is 4 then "y" }
|
||||
if a { is 10 then "x", is 4 then "y" }
|
||||
"#;
|
||||
eval_assert(source, "\"x\"");
|
||||
|
||||
let source = r#"
|
||||
let a = 10
|
||||
if a { is 15 then "x"; is 10 then "y" }
|
||||
if a { is 15 then "x", is 10 then "y" }
|
||||
"#;
|
||||
eval_assert(source, "\"y\"");
|
||||
}
|
||||
@ -300,7 +300,7 @@ if a { is 15 then "x"; is 10 then "y" }
|
||||
fn string_pattern() {
|
||||
let source = r#"
|
||||
let a = "foo"
|
||||
if a { is "foo" then "x"; is _ then "y" }
|
||||
if a { is "foo" then "x", is _ then "y" }
|
||||
"#;
|
||||
eval_assert(source, "\"x\"");
|
||||
}
|
||||
@ -310,7 +310,7 @@ fn boolean_pattern() {
|
||||
let source = r#"
|
||||
let a = true
|
||||
if a {
|
||||
is true then "x"
|
||||
is true then "x",
|
||||
is false then "y"
|
||||
}
|
||||
"#;
|
||||
@ -321,7 +321,7 @@ if a {
|
||||
fn boolean_pattern_2() {
|
||||
let source = r#"
|
||||
let a = false
|
||||
if a { is true then "x"; is false then "y" }
|
||||
if a { is true then "x", is false then "y" }
|
||||
"#;
|
||||
eval_assert(source, "\"y\"");
|
||||
}
|
||||
@ -341,7 +341,7 @@ if Option::Some(10) {
|
||||
fn tuple_pattern() {
|
||||
let source = r#"
|
||||
if (1, 2) {
|
||||
is (1, x) then x;
|
||||
is (1, x) then x,
|
||||
is _ then 99
|
||||
}
|
||||
"#;
|
||||
@ -352,7 +352,7 @@ if (1, 2) {
|
||||
fn tuple_pattern_2() {
|
||||
let source = r#"
|
||||
if (1, 2) {
|
||||
is (10, x) then x
|
||||
is (10, x) then x,
|
||||
is (y, x) then x + y
|
||||
}
|
||||
"#;
|
||||
@ -363,7 +363,7 @@ if (1, 2) {
|
||||
fn tuple_pattern_3() {
|
||||
let source = r#"
|
||||
if (1, 5) {
|
||||
is (10, x) then x
|
||||
is (10, x) then x,
|
||||
is (1, x) then x
|
||||
}
|
||||
"#;
|
||||
@ -374,8 +374,8 @@ if (1, 5) {
|
||||
fn tuple_pattern_4() {
|
||||
let source = r#"
|
||||
if (1, 5) {
|
||||
is (10, x) then x
|
||||
is (1, x) then x
|
||||
is (10, x) then x,
|
||||
is (1, x) then x,
|
||||
}
|
||||
"#;
|
||||
eval_assert(source, "5");
|
||||
@ -390,21 +390,21 @@ let b = Stuff::Jugs(1, "haha")
|
||||
let c = Stuff::Mardok
|
||||
|
||||
let x = if a {
|
||||
is Stuff::Mulch(20) then "x"
|
||||
is Stuff::Mulch(20) then "x",
|
||||
is _ then "ERR"
|
||||
}
|
||||
|
||||
let y = if b {
|
||||
is Stuff::Mulch(n) then "ERR"
|
||||
is Stuff::Jugs(2, _) then "ERR"
|
||||
is Stuff::Jugs(1, s) then s
|
||||
is _ then "ERR"
|
||||
is Stuff::Mulch(n) then "ERR",
|
||||
is Stuff::Jugs(2, _) then "ERR",
|
||||
is Stuff::Jugs(1, s) then s,
|
||||
is _ then "ERR",
|
||||
}
|
||||
|
||||
let z = if c {
|
||||
is Stuff::Jugs(_, _) then "ERR"
|
||||
is Stuff::Mardok then "NIGH"
|
||||
is _ then "ERR"
|
||||
is Stuff::Jugs(_, _) then "ERR",
|
||||
is Stuff::Mardok then "NIGH",
|
||||
is _ then "ERR",
|
||||
}
|
||||
|
||||
(x, y, z)
|
||||
|
@ -52,8 +52,10 @@ where T: Hash + Eq
|
||||
/// Quickly create an AST from a string, with no error checking. For test use only
|
||||
#[cfg(test)]
|
||||
pub fn quick_ast(input: &str) -> crate::ast::AST {
|
||||
let mut parser = crate::parsing::new::Parser::new();
|
||||
let output = parser.parse(input);
|
||||
let tokens = crate::tokenizing::tokenize(input);
|
||||
let mut parser = crate::parsing::Parser::new();
|
||||
parser.add_new_tokens(tokens);
|
||||
let output = parser.parse();
|
||||
output.unwrap()
|
||||
}
|
||||
|
||||
|
@ -77,7 +77,7 @@ x is Some(t) // type bool
|
||||
|
||||
if x {
|
||||
is Some(t) => {
|
||||
}
|
||||
},
|
||||
is None => {
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user