Move maaru into separate crate

This commit is contained in:
greg
2018-03-20 23:29:56 -07:00
parent 795b4adc6b
commit 5f279cb400
8 changed files with 29 additions and 12 deletions

11
maaru/Cargo.toml Normal file
View File

@@ -0,0 +1,11 @@
[package]
name = "maaru-lang"
version = "0.1.0"
authors = ["greg <greg.shuflin@protonmail.com>"]
[dependencies]
itertools = "0.5.8"
take_mut = "0.1.3"
llvm-sys = "*"
schala-repl = { path = "../schala-repl" }

279
maaru/src/compilation.rs Normal file
View File

@@ -0,0 +1,279 @@
extern crate llvm_sys;
use std::collections::HashMap;
use self::llvm_sys::prelude::*;
use self::llvm_sys::{LLVMIntPredicate};
use parser::{AST, Statement, Function, Prototype, Expression, BinOp};
use schala_repl::LLVMCodeString;
use schala_repl::llvm_wrap as LLVMWrap;
type VariableMap = HashMap<String, LLVMValueRef>;
struct CompilationData {
context: LLVMContextRef,
module: LLVMModuleRef,
builder: LLVMBuilderRef,
variables: VariableMap,
main_function: LLVMValueRef,
current_function: Option<LLVMValueRef>,
}
pub fn compile_ast(ast: AST) -> LLVMCodeString {
println!("Compiling!");
let names: VariableMap = HashMap::new();
let context = LLVMWrap::create_context();
let module = LLVMWrap::module_create_with_name("example module");
let builder = LLVMWrap::CreateBuilderInContext(context);
let program_return_type = LLVMWrap::Int64TypeInContext(context);
let main_function_type = LLVMWrap::FunctionType(program_return_type, Vec::new(), false);
let main_function: LLVMValueRef = LLVMWrap::AddFunction(module, "main", main_function_type);
let mut data = CompilationData {
context: context,
builder: builder,
module: module,
variables: names,
main_function: main_function,
current_function: None,
};
let bb = LLVMWrap::AppendBasicBlockInContext(data.context, data.main_function, "entry");
LLVMWrap::PositionBuilderAtEnd(builder, bb);
let value = ast.codegen(&mut data);
LLVMWrap::BuildRet(builder, value);
let ret = LLVMWrap::PrintModuleToString(module);
// Clean up. Values created in the context mostly get cleaned up there.
LLVMWrap::DisposeBuilder(builder);
LLVMWrap::DisposeModule(module);
LLVMWrap::ContextDispose(context);
LLVMCodeString(ret)
}
trait CodeGen {
fn codegen(&self, &mut CompilationData) -> LLVMValueRef;
}
impl CodeGen for AST {
fn codegen(&self, data: &mut CompilationData) -> LLVMValueRef {
let int_type = LLVMWrap::Int64TypeInContext(data.context);
let mut ret = LLVMWrap::ConstInt(int_type, 0, false);
for statement in self {
ret = statement.codegen(data);
}
ret
}
}
impl CodeGen for Statement {
fn codegen(&self, data: &mut CompilationData) -> LLVMValueRef {
use self::Statement::*;
match self {
&ExprNode(ref expr) => expr.codegen(data),
&FuncDefNode(ref func) => func.codegen(data),
}
}
}
impl CodeGen for Function {
fn codegen(&self, data: &mut CompilationData) -> LLVMValueRef {
/* should have a check here for function already being defined */
let function = self.prototype.codegen(data);
let ref body = self.body;
data.current_function = Some(function);
let return_type = LLVMWrap::Int64TypeInContext(data.context);
let mut ret = LLVMWrap::ConstInt(return_type, 0, false);
let block = LLVMWrap::AppendBasicBlockInContext(data.context, function, "entry");
LLVMWrap::PositionBuilderAtEnd(data.builder, block);
//insert function params into variables
for value in LLVMWrap::GetParams(function) {
let name = LLVMWrap::GetValueName(value);
data.variables.insert(name, value);
}
for expr in body {
ret = expr.codegen(data);
}
LLVMWrap::BuildRet(data.builder, ret);
// get basic block of main
let main_bb = LLVMWrap::GetBasicBlocks(data.main_function).get(0).expect("Couldn't get first block of main").clone();
LLVMWrap::PositionBuilderAtEnd(data.builder, main_bb);
data.current_function = None;
ret
}
}
impl CodeGen for Prototype {
fn codegen(&self, data: &mut CompilationData) -> LLVMValueRef {
let num_args = self.parameters.len();
let return_type = LLVMWrap::Int64TypeInContext(data.context);
let mut arguments: Vec<LLVMTypeRef> = vec![];
for _ in 0..num_args {
arguments.push(LLVMWrap::Int64TypeInContext(data.context));
}
let function_type =
LLVMWrap::FunctionType(return_type,
arguments,
false);
let function = LLVMWrap::AddFunction(data.module,
&*self.name,
function_type);
let function_params = LLVMWrap::GetParams(function);
for (index, param) in function_params.iter().enumerate() {
let name = self.parameters.get(index).expect(&format!("Failed this check at index {}", index));
let new = *param;
LLVMWrap::SetValueName(new, name);
}
function
}
}
impl CodeGen for Expression {
fn codegen(&self, data: &mut CompilationData) -> LLVMValueRef {
use self::BinOp::*;
use self::Expression::*;
let int_type = LLVMWrap::Int64TypeInContext(data.context);
let zero = LLVMWrap::ConstInt(int_type, 0, false);
match *self {
Variable(ref name) => *data.variables.get(&**name).expect(&format!("Can't find variable {}", name)),
BinExp(Assign, ref left, ref right) => {
if let Variable(ref name) = **left {
let new_value = right.codegen(data);
data.variables.insert((**name).clone(), new_value);
new_value
} else {
panic!("Bad variable assignment")
}
}
BinExp(ref op, ref left, ref right) => {
let lhs = left.codegen(data);
let rhs = right.codegen(data);
op.codegen_with_ops(data, lhs, rhs)
}
Number(ref n) => {
let native_val = *n as u64;
let int_value: LLVMValueRef = LLVMWrap::ConstInt(int_type, native_val, false);
int_value
}
Conditional(ref test, ref then_expr, ref else_expr) => {
let condition_value = test.codegen(data);
let is_nonzero =
LLVMWrap::BuildICmp(data.builder,
LLVMIntPredicate::LLVMIntNE,
condition_value,
zero,
"ifcond");
let func = LLVMWrap::GetBasicBlockParent(LLVMWrap::GetInsertBlock(data.builder));
let mut then_block =
LLVMWrap::AppendBasicBlockInContext(data.context, func, "then_block");
let mut else_block =
LLVMWrap::AppendBasicBlockInContext(data.context, func, "else_block");
let merge_block =
LLVMWrap::AppendBasicBlockInContext(data.context, func, "ifcont");
// add conditional branch to ifcond block
LLVMWrap::BuildCondBr(data.builder, is_nonzero, then_block, else_block);
// start inserting into then block
LLVMWrap::PositionBuilderAtEnd(data.builder, then_block);
// then-block codegen
let then_return = then_expr.codegen(data);
LLVMWrap::BuildBr(data.builder, merge_block);
// update then block b/c recursive codegen() call may have changed the notion of
// the current block
then_block = LLVMWrap::GetInsertBlock(data.builder);
// then do the same stuff again for the else branch
//
LLVMWrap::PositionBuilderAtEnd(data.builder, else_block);
let else_return = match *else_expr {
Some(ref e) => e.codegen(data),
None => zero,
};
LLVMWrap::BuildBr(data.builder, merge_block);
else_block = LLVMWrap::GetInsertBlock(data.builder);
LLVMWrap::PositionBuilderAtEnd(data.builder, merge_block);
let phi = LLVMWrap::BuildPhi(data.builder, int_type, "phinode");
let values = vec![then_return, else_return];
let blocks = vec![then_block, else_block];
LLVMWrap::AddIncoming(phi, values, blocks);
phi
}
Block(ref exprs) => {
let mut ret = zero;
for e in exprs.iter() {
ret = e.codegen(data);
}
ret
}
ref e => {
println!("Unimplemented {:?}", e);
unimplemented!()
}
}
}
}
impl BinOp {
fn codegen_with_ops(&self, data: &CompilationData, lhs: LLVMValueRef, rhs: LLVMValueRef) -> LLVMValueRef {
use self::BinOp::*;
macro_rules! simple_binop {
($fnname: expr, $name: expr) => {
$fnname(data.builder, lhs, rhs, $name)
}
}
let int_type = LLVMWrap::Int64TypeInContext(data.context);
match *self {
Add => simple_binop!(LLVMWrap::BuildAdd, "addtemp"),
Sub => simple_binop!(LLVMWrap::BuildSub, "subtemp"),
Mul => simple_binop!(LLVMWrap::BuildMul, "multemp"),
Div => simple_binop!(LLVMWrap::BuildUDiv, "divtemp"),
Mod => simple_binop!(LLVMWrap::BuildSRem, "remtemp"),
Less => {
let pred: LLVMValueRef =
LLVMWrap::BuildICmp(data.builder, LLVMIntPredicate::LLVMIntULT, lhs, rhs, "tmp");
LLVMWrap::BuildZExt(data.builder, pred, int_type, "temp")
}
Greater => {
let pred: LLVMValueRef =
LLVMWrap::BuildICmp(data.builder, LLVMIntPredicate::LLVMIntUGT, lhs, rhs, "tmp");
LLVMWrap::BuildZExt(data.builder, pred, int_type, "temp")
}
ref unknown => panic!("Bad operator {:?}", unknown),
}
}
}

481
maaru/src/eval.rs Normal file
View File

@@ -0,0 +1,481 @@
extern crate take_mut;
use std::collections::HashMap;
use std::collections::VecDeque;
use parser::{AST, Statement, Expression, Function, Callable, BinOp};
use std::rc::Rc;
use std::io::{Write, Stdout, BufWriter};
use std::convert::From;
use parser::Expression::*;
use parser::Statement::*;
type Reduction<T> = (T, Option<SideEffect>);
#[derive(Debug, Clone)]
enum ReducedValue {
StringLiteral(Rc<String>),
ListLiteral(VecDeque<Expression>),
StructLiteral(VecDeque<(Rc<String>, Expression)>),
Number(f64),
Lambda(Function),
}
impl From<ReducedValue> for Expression {
fn from(rv: ReducedValue) -> Expression {
match rv {
ReducedValue::Number(n) => Expression::Number(n),
ReducedValue::StringLiteral(n) => Expression::StringLiteral(n),
ReducedValue::Lambda(f) => Expression::Lambda(f),
ReducedValue::ListLiteral(items) => Expression::ListLiteral(items),
ReducedValue::StructLiteral(items) => Expression::StructLiteral(items),
}
}
}
impl From<Expression> for ReducedValue {
fn from(rv: Expression) -> ReducedValue {
match rv {
Expression::Number(n) => ReducedValue::Number(n),
Expression::StringLiteral(n) => ReducedValue::StringLiteral(n),
Expression::Lambda(f) => ReducedValue::Lambda(f),
Expression::ListLiteral(items) => ReducedValue::ListLiteral(items),
Expression::StructLiteral(items) => ReducedValue::StructLiteral(items),
_ => panic!("trying to store a non-fully-reduced variable"),
}
}
}
fn get_indexer(f: f64) -> Option<usize> {
if f.fract() == 0.0 {
if f.trunc() >= 0.0 {
return Some(f.trunc() as usize);
}
}
None
}
#[derive(Debug)]
enum SideEffect {
Print(String),
AddBinding(Rc<String>, ReducedValue),
}
pub struct Evaluator<'a> {
parent: Option<&'a Evaluator<'a>>,
variables: HashMap<String, ReducedValue>,
stdout: BufWriter<Stdout>,
pub trace_evaluation: bool,
}
impl<'a> Evaluator<'a> {
pub fn new(parent: Option<&'a Evaluator>) -> Evaluator<'a> {
Evaluator {
variables: HashMap::new(),
parent: parent,
stdout: BufWriter::new(::std::io::stdout()),
trace_evaluation: parent.map_or(false, |e| e.trace_evaluation),
}
}
pub fn run(&mut self, ast: AST) -> Vec<String> {
ast.into_iter()
.map(|astnode| format!("{}", self.reduction_loop(astnode)))
.collect()
}
fn add_binding(&mut self, var: String, value: ReducedValue) {
self.variables.insert(var, value);
}
fn lookup_binding(&self, var: &str) -> Option<ReducedValue> {
match self.variables.get(var) {
Some(expr) => Some(expr.clone()),
None => match self.parent {
Some(env) => env.lookup_binding(var),
None => None
}
}
}
}
trait Evaluable {
fn is_reducible(&self) -> bool;
}
impl Evaluable for Statement {
fn is_reducible(&self) -> bool {
match self {
&ExprNode(ref expr) => expr.is_reducible(),
&FuncDefNode(_) => true,
}
}
}
impl Evaluable for Expression {
fn is_reducible(&self) -> bool {
match *self {
Null => false,
StringLiteral(_) => false,
Lambda(_) => false,
Number(_) => false,
ListLiteral(ref items) => {
items.iter().any(|x| x.is_reducible())
}
StructLiteral(ref items) => {
items.iter().any(|pair| pair.1.is_reducible())
}
_ => true,
}
}
}
impl Expression {
fn is_truthy(&self) -> bool {
match *self {
Null => false,
StringLiteral(ref s) if **s == "" => false,
Number(n) if n == 0.0 => false,
_ => true,
}
}
}
fn is_assignment(op: &BinOp) -> bool {
use self::BinOp::*;
match *op {
Assign | AddAssign | SubAssign |
MulAssign | DivAssign => true,
_ => false,
}
}
impl<'a> Evaluator<'a> {
fn reduction_loop(&mut self, mut node: Statement) -> Statement {
loop {
node = self.step(node);
if !node.is_reducible() {
break;
}
}
node
}
fn step(&mut self, node: Statement) -> Statement {
let mut trace = String::new();
if self.trace_evaluation {
trace.push_str(&format!("Step: {:?}", node));
}
let (new_node, side_effect) = self.reduce_astnode(node);
if self.trace_evaluation {
trace.push_str(&format!("{:?}", new_node));
}
if let Some(s) = side_effect {
if self.trace_evaluation {
trace.push_str(&format!(" | side-effect: {:?}", s));
}
self.perform_side_effect(s);
}
if self.trace_evaluation {
println!("{}", trace);
}
new_node
}
fn perform_side_effect(&mut self, side_effect: SideEffect) {
use self::SideEffect::*;
match side_effect {
Print(s) => {
write!(self.stdout, "{}\n", s).unwrap();
match self.stdout.flush() {
Ok(_) => (),
Err(_) => println!("Could not flush stdout"),
};
}
AddBinding(var, value) => {
self.add_binding((*var).clone(), value);
},
}
}
fn reduce_astnode(&mut self, node: Statement) -> Reduction<Statement> {
match node {
ExprNode(expr) => {
if expr.is_reducible() {
let (new_expr, side_effect) = self.reduce_expr(expr);
(ExprNode(new_expr), side_effect)
} else {
(ExprNode(expr), None)
}
}
FuncDefNode(func) => {
let name = func.prototype.name.clone();
let reduced_value = ReducedValue::Lambda(func.clone());
let binding = Some(SideEffect::AddBinding(name, reduced_value));
(ExprNode(Expression::Lambda(func)), binding)
}
}
}
//TODO I probably want another Expression variant that holds a ReducedValue
fn reduce_expr(&mut self, expression: Expression) -> Reduction<Expression> {
match expression {
Null => (Null, None),
e @ StringLiteral(_) => (e, None),
e @ Number(_) => (e, None),
e @ Lambda(_) => (e, None),
Variable(ref var) => {
match self.lookup_binding(var).map(|x| x.into()) {
None => (Null, None),
Some(expr) => (expr, None),
}
}
BinExp(op, mut left, mut right) => {
if right.is_reducible() {
let mut side_effect = None;
take_mut::take(right.as_mut(), |expr| { let (a, b) = self.reduce_expr(expr); side_effect = b; a});
return (BinExp(op, left, right), side_effect);
}
if let BinOp::Assign = op {
return match *left {
Variable(var) => {
let reduced_value: ReducedValue = ReducedValue::from(*right);
let binding = SideEffect::AddBinding(var, reduced_value);
(Null, Some(binding))
},
_ => (Null, None)
};
}
if is_assignment(&op) {
use self::BinOp::*;
let new_op = match op {
AddAssign => Add,
SubAssign => Sub,
MulAssign => Mul,
DivAssign => Div,
_ => unreachable!(),
};
let reduction =
BinExp(BinOp::Assign,
Box::new(*left.clone()),
Box::new(BinExp(new_op, left, right))
);
return (reduction, None);
}
if left.is_reducible() {
let mut side_effect = None;
take_mut::take(left.as_mut(), |expr| { let (a, b) = self.reduce_expr(expr); side_effect = b; a});
(BinExp(op, left, right), side_effect)
} else {
(self.reduce_binop(op, *left, *right), None) //can assume both arguments are maximally reduced
}
}
Call(callable, mut args) => {
let mut f = true;
for arg in args.iter_mut() {
if arg.is_reducible() {
take_mut::take(arg, |arg| self.reduce_expr(arg).0);
f = false;
break;
}
}
if f {
self.reduce_call(callable, args)
} else {
(Call(callable, args), None)
}
}
While(test, body) => {
let mut block = VecDeque::from(body.clone());
block.push_back(While(test.clone(), body.clone()));
let reduction = Conditional(test, Box::new(Block(block)), None);
(reduction, None)
}
Conditional(box test, then_block, else_block) => {
if test.is_reducible() {
let (new_test, new_effect) = self.reduce_expr(test);
(Conditional(Box::new(new_test), then_block, else_block), new_effect)
} else {
if test.is_truthy() {
(*then_block, None)
} else {
match else_block {
Some(box expr) => (expr, None),
None => (Null, None),
}
}
}
}
Block(mut exprs) => {
let first = exprs.pop_front();
match first {
None => (Null, None),
Some(expr) => {
if exprs.len() == 0 {
(expr, None)
} else {
if expr.is_reducible() {
let (new, side_effect) = self.reduce_expr(expr);
exprs.push_front(new);
(Block(exprs), side_effect)
} else {
(Block(exprs), None)
}
}
}
}
}
Index(mut expr, mut index_expr) => {
if index_expr.is_reducible() {
let mut side_effect = None;
take_mut::take(index_expr.as_mut(), |expr| { let (a, b) = self.reduce_expr(expr); side_effect = b; a});
return (Index(expr, index_expr), side_effect)
}
if expr.is_reducible() {
let mut side_effect = None;
take_mut::take(expr.as_mut(), |expr| { let (a, b) = self.reduce_expr(expr); side_effect = b; a});
return (Index(expr, index_expr), side_effect);
}
match (*expr, *index_expr) {
(ListLiteral(list_items), Number(n)) => {
let indexed_expr = get_indexer(n).and_then(|i| list_items.get(i));
if let Some(e) = indexed_expr {
(e.clone(), None)
} else {
(Null, None)
}
}
(StructLiteral(items), StringLiteral(s)) => {
for item in items {
if s == item.0 {
return (item.1.clone(), None); //TODO this is hella inefficient
}
}
(Null, None)
},
_ => (Null, None)
}
}
ListLiteral(mut exprs) => {
let mut side_effect = None;
for expr in exprs.iter_mut() {
if expr.is_reducible() {
take_mut::take(expr, |expr| {
let (a, b) = self.reduce_expr(expr);
side_effect = b;
a
});
break;
}
}
(ListLiteral(exprs), side_effect)
},
StructLiteral(mut items) => {
let mut side_effect = None;
for pair in items.iter_mut() {
if pair.1.is_reducible() {
take_mut::take(pair, |pair| {
let (name, expr) = pair;
let (a, b) = self.reduce_expr(expr);
side_effect = b;
(name, a)
});
break;
}
}
(StructLiteral(items), side_effect)
}
}
}
fn reduce_binop(&mut self, op: BinOp, left: Expression, right: Expression) -> Expression {
use self::BinOp::*;
let truthy = Number(1.0);
let falsy = Null;
match (op, left, right) {
(Add, Number(l), Number(r)) => Number(l + r),
(Add, StringLiteral(s1), StringLiteral(s2)) => StringLiteral(Rc::new(format!("{}{}", *s1, *s2))),
(Add, StringLiteral(s1), Number(r)) => StringLiteral(Rc::new(format!("{}{}", *s1, r))),
(Add, Number(l), StringLiteral(s1)) => StringLiteral(Rc::new(format!("{}{}", l, *s1))),
(Sub, Number(l), Number(r)) => Number(l - r),
(Mul, Number(l), Number(r)) => Number(l * r),
(Div, Number(l), Number(r)) if r != 0.0 => Number(l / r),
(Mod, Number(l), Number(r)) => Number(l % r),
(Less, Number(l), Number(r)) => if l < r { truthy } else { falsy },
(LessEq, Number(l), Number(r)) => if l <= r { truthy } else { falsy },
(Greater, Number(l), Number(r)) => if l > r { truthy } else { falsy },
(GreaterEq, Number(l), Number(r)) => if l >= r { truthy } else { falsy },
(Equal, Number(l), Number(r)) => if l == r { truthy } else { falsy },
(Equal, Null, Null) => truthy,
(Equal, StringLiteral(s1), StringLiteral(s2)) => if s1 == s2 { truthy } else { falsy },
(Equal, _, _) => falsy,
_ => falsy,
}
}
fn reduce_call(&mut self, callable: Callable, arguments: Vec<Expression>) -> Reduction<Expression> {
if let Some(res) = handle_builtin(&callable, &arguments) {
return res;
}
let function = match callable {
Callable::Lambda(func) => func.clone(),
Callable::NamedFunction(name) => {
match self.lookup_binding(&*name) {
Some(ReducedValue::Lambda(func)) => func,
_ => return (Null, None),
}
}
};
if function.prototype.parameters.len() != arguments.len() {
return (Null, None);
}
let mut evaluator = Evaluator::new(Some(self));
for (binding, expr) in function.prototype.parameters.iter().zip(arguments.iter()) {
evaluator.add_binding((**binding).clone(), expr.clone().into());
}
let nodes = function.body.iter().map(|node| node.clone());
let mut retval = ExprNode(Null);
for n in nodes {
retval = evaluator.reduction_loop(n);
}
match retval {
ExprNode(expr) => (expr, None),
FuncDefNode(_) => panic!("This should never happen! A maximally-reduced node\
should never be a function definition!")
}
}
}
fn handle_builtin(callable: &Callable, arguments: &Vec<Expression>) -> Option<Reduction<Expression>> {
let name: &str = match *callable {
Callable::NamedFunction(ref name) => *&name,
_ => return None,
};
match name {
"print" => {
let mut s = String::new();
for arg in arguments {
s.push_str(&format!("{} ", arg));
}
return Some((Null, Some(SideEffect::Print(s))));
},
_ => None
}
}

106
maaru/src/lib.rs Normal file
View File

@@ -0,0 +1,106 @@
#![feature(box_patterns)]
extern crate schala_repl;
mod tokenizer;
mod parser;
mod eval;
mod compilation;
use schala_repl::{ProgrammingLanguageInterface, EvalOptions, LanguageOutput, TraceArtifact};
#[derive(Debug)]
pub struct TokenError {
pub msg: String,
}
impl TokenError {
pub fn new(msg: &str) -> TokenError {
TokenError { msg: msg.to_string() }
}
}
pub use self::eval::Evaluator as MaaruEvaluator;
pub struct Maaru<'a> {
evaluator: MaaruEvaluator<'a>
}
impl<'a> Maaru<'a> {
pub fn new() -> Maaru<'a> {
Maaru {
evaluator: MaaruEvaluator::new(None),
}
}
}
impl<'a> ProgrammingLanguageInterface for Maaru<'a> {
fn get_language_name(&self) -> String {
"Maaru".to_string()
}
fn get_source_file_suffix(&self) -> String {
format!("maaru")
}
fn evaluate_in_repl(&mut self, input: &str, options: &EvalOptions) -> LanguageOutput {
let mut output = LanguageOutput::default();
let tokens = match tokenizer::tokenize(input) {
Ok(tokens) => {
if options.debug.tokens {
output.add_artifact(TraceArtifact::new("tokens", format!("{:?}", tokens)));
}
tokens
},
Err(err) => {
output.add_output(format!("Tokenization error: {:?}\n", err.msg));
return output;
}
};
let ast = match parser::parse(&tokens, &[]) {
Ok(ast) => {
if options.debug.ast {
output.add_artifact(TraceArtifact::new("ast", format!("{:?}", ast)));
}
ast
},
Err(err) => {
output.add_output(format!("Parse error: {:?}\n", err.msg));
return output;
}
};
let mut evaluation_output = String::new();
for s in self.evaluator.run(ast).iter() {
evaluation_output.push_str(s);
}
output.add_output(evaluation_output);
return output;
}
/* TODO make this work with new framework */
/*
fn can_compile(&self) -> bool {
true
}
fn compile(&mut self, input: &str) -> LLVMCodeString {
let tokens = match tokenizer::tokenize(input) {
Ok(tokens) => tokens,
Err(err) => {
let msg = format!("Tokenization error: {:?}\n", err.msg);
panic!("{}", msg);
}
};
let ast = match parser::parse(&tokens, &[]) {
Ok(ast) => ast,
Err(err) => {
let msg = format!("Parse error: {:?}\n", err.msg);
panic!("{}", msg);
}
};
compilation::compile_ast(ast)
}
*/
}

755
maaru/src/parser.rs Normal file
View File

@@ -0,0 +1,755 @@
use tokenizer::{Token, Kw, OpTok};
use tokenizer::Token::*;
use std::fmt;
use std::collections::VecDeque;
use std::rc::Rc;
use std::convert::From;
// Grammar
// program := (statement delimiter ?)*
// delimiter := Newline | Semicolon
// statement := declaration | expression
// declaration := FN prototype LCurlyBrace (statement)* RCurlyBrace
// prototype := identifier LParen identlist RParen
// identlist := Ident (Comma Ident)* | ε
// exprlist := Expression (Comma Expression)* | ε
// itemlist := Ident COLON Expression (Comma Ident COLON Expression)* | ε
//
// expression := postop_expression (op postop_expression)*
// postop_expression := primary_expression postop
// primary_expression := number_expr | String | identifier_expr | paren_expr | conditional_expr | while_expr | lambda_expr | list_expr | struct_expr
// number_expr := (PLUS | MINUS ) number_expr | Number
// identifier_expr := call_expression | Variable
// list_expr := LSquareBracket exprlist RSquareBracket
// struct_expr := LCurlyBrace itemlist RCurlyBrace
// call_expression := Identifier LParen exprlist RParen
// while_expr := WHILE primary_expression LCurlyBrace (expression delimiter)* RCurlyBrace
// paren_expr := LParen expression RParen
// conditional_expr := IF expression LCurlyBrace (expression delimiter)* RCurlyBrace (LCurlyBrace (expresion delimiter)* RCurlyBrace)?
// lambda_expr := FN LParen identlist RParen LCurlyBrace (expression delimiter)* RCurlyBrace
// lambda_call := | LParen exprlist RParen
// postop := ε | LParen exprlist RParen | LBracket expression RBracket
// op := '+', '-', etc.
//
pub type AST = Vec<Statement>;
#[derive(Debug, Clone)]
pub enum Statement {
ExprNode(Expression),
FuncDefNode(Function),
}
impl fmt::Display for Statement {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Statement::*;
match *self {
ExprNode(ref expr) => write!(f, "{}", expr),
FuncDefNode(_) => write!(f, "UNIMPLEMENTED"),
}
}
}
#[derive(Debug, Clone)]
pub struct Function {
pub prototype: Prototype,
pub body: Vec<Statement>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Prototype {
pub name: Rc<String>,
pub parameters: Vec<Rc<String>>,
}
#[derive(Debug, Clone)]
pub enum Expression {
Null,
StringLiteral(Rc<String>),
Number(f64),
Variable(Rc<String>),
BinExp(BinOp, Box<Expression>, Box<Expression>),
Call(Callable, Vec<Expression>),
Conditional(Box<Expression>, Box<Expression>, Option<Box<Expression>>),
Lambda(Function),
Block(VecDeque<Expression>),
While(Box<Expression>, Vec<Expression>),
Index(Box<Expression>, Box<Expression>),
ListLiteral(VecDeque<Expression>),
StructLiteral(VecDeque<(Rc<String>, Expression)>),
}
#[derive(Clone, Debug)]
pub enum Callable {
NamedFunction(Rc<String>),
Lambda(Function),
}
//TODO this ought to be ReducedExpression
impl fmt::Display for Expression {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Expression::*;
match *self {
Null => write!(f, "null"),
StringLiteral(ref s) => write!(f, "\"{}\"", s),
Number(n) => write!(f, "{}", n),
Lambda(Function { prototype: Prototype { ref name, ref parameters, .. }, .. }) => {
write!(f, "«function: {}, {} arg(s)»", name, parameters.len())
}
ListLiteral(ref items) => {
write!(f, "[ ")?;
let mut iter = items.iter().peekable();
while let Some(item) = iter.next() {
write!(f, "{}", item)?;
if let Some(_) = iter.peek() {
write!(f, ", ")?;
}
}
write!(f, " ]")
}
StructLiteral(ref items) => {
write!(f, "{} ", "{")?;
let mut iter = items.iter().peekable();
while let Some(pair) = iter.next() {
write!(f, "{}: {}", pair.0, pair.1)?;
if let Some(_) = iter.peek() {
write!(f, ", ")?;
}
}
write!(f, "{} ", "}")
}
_ => write!(f, "UNIMPLEMENTED"),
}
}
}
#[derive(Debug, Clone)]
pub enum BinOp {
Add,
AddAssign,
Sub,
SubAssign,
Mul,
MulAssign,
Div,
DivAssign,
Mod,
Less,
LessEq,
Greater,
GreaterEq,
Equal,
Assign,
Custom(String),
}
impl From<OpTok> for BinOp {
fn from(token: OpTok) -> BinOp {
use self::BinOp::*;
match &token.0[..] {
"+" => Add,
"+=" => AddAssign,
"-" => Sub,
"-=" => SubAssign,
"*" => Mul,
"*=" => MulAssign,
"/" => Div,
"/=" => DivAssign,
"%" => Mod,
"<" => Less,
"<=" => LessEq,
">" => Greater,
">=" => GreaterEq,
"==" => Equal,
"=" => Assign,
op => Custom(op.to_string()),
}
}
}
type Precedence = u8;
// TODO make this support incomplete parses
pub type ParseResult<T> = Result<T, ParseError>;
#[derive(Debug)]
pub struct ParseError {
pub msg: String,
pub remaining_tokens: Vec<Token>,
}
impl ParseError {
fn result_from_str<T>(msg: &str) -> ParseResult<T> {
Err(ParseError {
msg: msg.to_string(),
remaining_tokens: vec![],
})
}
}
struct Parser {
tokens: Vec<Token>,
}
impl Parser {
fn initialize(tokens: &[Token]) -> Parser {
let mut tokens = tokens.to_vec();
tokens.reverse();
Parser { tokens: tokens }
}
fn peek(&self) -> Option<Token> {
self.tokens.last().map(|x| x.clone())
}
fn next(&mut self) -> Option<Token> {
self.tokens.pop()
}
fn get_precedence(&self, op: &OpTok) -> Precedence {
match &op.0[..] {
"+" => 10,
"-" => 10,
"*" => 20,
"/" => 20,
"%" => 20,
"==" => 40,
"=" | "+=" | "-=" | "*=" | "/=" => 1,
">" | ">=" | "<" | "<=" => 30,
_ => 255,
}
}
}
macro_rules! expect {
($self_:expr, $token:pat) => {
match $self_.peek() {
Some($token) => {$self_.next();},
Some(x) => {
let err = format!("Expected `{:?}` but got `{:?}`", stringify!($token), x);
return ParseError::result_from_str(&err)
},
None => {
let err = format!("Expected `{:?}` but got end of input", stringify!($token));
return ParseError::result_from_str(&err) //TODO make this not require 2 stringifications
}
}
}
}
macro_rules! expect_identifier {
($self_:expr) => {
match $self_.peek() {
Some(Identifier(s)) => {$self_.next(); s},
Some(x) => return ParseError::result_from_str(&format!("Expected identifier, but got {:?}", x)),
None => return ParseError::result_from_str("Expected identifier, but got end of input"),
}
}
}
macro_rules! skip_whitespace {
($_self: expr) => {
loop {
match $_self.peek() {
Some(ref t) if is_delimiter(t) => {
$_self.next();
continue;
}
_ => break,
}
}
}
}
macro_rules! delimiter_block {
($_self: expr, $try_parse: ident, $($break_pattern: pat)|+) => {
{
let mut acc = Vec::new();
loop {
match $_self.peek() {
None => break,
Some(ref t) if is_delimiter(t) => { $_self.next(); continue; },
$($break_pattern)|+ => break,
_ => {
let a = try!($_self.$try_parse());
acc.push(a);
}
}
}
acc
}
}
}
fn is_delimiter(token: &Token) -> bool {
match *token {
Newline | Semicolon => true,
_ => false,
}
}
impl Parser {
fn program(&mut self) -> ParseResult<AST> {
let mut ast = Vec::new(); //TODO have this come from previously-parsed tree
loop {
let result: ParseResult<Statement> = match self.peek() {
Some(ref t) if is_delimiter(t) => {
self.next();
continue;
}
Some(_) => self.statement(),
None => break,
};
match result {
Ok(node) => ast.push(node),
Err(mut err) => {
err.remaining_tokens = self.tokens.clone();
err.remaining_tokens.reverse();
return Err(err);
}
}
}
Ok(ast)
}
fn statement(&mut self) -> ParseResult<Statement> {
let node: Statement = match self.peek() {
Some(Keyword(Kw::Fn)) => self.declaration()?,
Some(_) => Statement::ExprNode(self.expression()?),
None => panic!("Unexpected end of tokens"),
};
Ok(node)
}
fn declaration(&mut self) -> ParseResult<Statement> {
expect!(self, Keyword(Kw::Fn));
let prototype = self.prototype()?;
expect!(self, LCurlyBrace);
let body = self.body()?;
expect!(self, RCurlyBrace);
Ok(Statement::FuncDefNode(Function {
prototype: prototype,
body: body,
}))
}
fn prototype(&mut self) -> ParseResult<Prototype> {
let name = expect_identifier!(self);
expect!(self, LParen);
let parameters = self.identlist()?;
expect!(self, RParen);
Ok(Prototype {
name: name,
parameters: parameters,
})
}
fn identlist(&mut self) -> ParseResult<Vec<Rc<String>>> {
let mut args = Vec::new();
while let Some(Identifier(name)) = self.peek() {
args.push(name.clone());
self.next();
match self.peek() {
Some(Comma) => {self.next();},
_ => break,
}
}
Ok(args)
}
fn exprlist(&mut self) -> ParseResult<Vec<Expression>> {
let mut exprs = Vec::new();
loop {
if let Some(RParen) = self.peek() {
break;
}
let exp = self.expression()?;
exprs.push(exp);
match self.peek() {
Some(Comma) => {self.next();},
_ => break,
}
}
Ok(exprs)
}
fn itemlist(&mut self) -> ParseResult<VecDeque<(Rc<String>, Expression)>> {
let mut items = VecDeque::new();
loop {
if let Some(RCurlyBrace) = self.peek() {
break;
}
let name = expect_identifier!(self);
expect!(self, Colon);
let expr = self.expression()?;
items.push_back((name, expr));
match self.peek() {
Some(Comma) => {self.next();},
_ => break,
};
}
Ok(items)
}
fn body(&mut self) -> ParseResult<Vec<Statement>> {
let statements = delimiter_block!(
self,
statement,
Some(RCurlyBrace)
);
Ok(statements)
}
fn expression(&mut self) -> ParseResult<Expression> {
let lhs: Expression = self.postop_expression()?;
self.precedence_expr(lhs, 0)
}
fn precedence_expr(&mut self,
mut lhs: Expression,
min_precedence: u8)
-> ParseResult<Expression> {
while let Some(Operator(op)) = self.peek() {
let precedence = self.get_precedence(&op);
if precedence < min_precedence {
break;
}
self.next();
let mut rhs = self.postop_expression()?;
while let Some(Operator(ref op)) = self.peek() {
if self.get_precedence(op) > precedence {
let new_prec = self.get_precedence(op);
rhs = self.precedence_expr(rhs, new_prec)?;
} else {
break;
}
}
lhs = Expression::BinExp(op.into(), Box::new(lhs), Box::new(rhs));
}
Ok(lhs)
}
fn postop_expression(&mut self) -> ParseResult<Expression> {
use self::Expression::*;
let expr = self.primary_expression()?;
let ret = match self.peek() {
Some(LParen) => {
let args = self.call_expression()?;
match expr {
Lambda(f) => Call(Callable::Lambda(f), args),
e => {
let err = format!("Expected lambda expression before a call, got {:?}", e);
return ParseError::result_from_str(&err);
},
}
},
Some(LSquareBracket) => {
expect!(self, LSquareBracket);
let index_expr = self.expression()?;
expect!(self, RSquareBracket);
Index(Box::new(expr), Box::new(index_expr))
},
_ => {
expr
}
};
Ok(ret)
}
fn primary_expression(&mut self) -> ParseResult<Expression> {
Ok(match self.peek() {
Some(Keyword(Kw::Null)) => {
self.next();
Expression::Null
}
Some(NumLiteral(_)) => self.number_expression()?,
Some(Operator(OpTok(ref a))) if **a == "+" || **a == "-" => self.number_expression()?,
Some(StrLiteral(s)) => {
self.next();
Expression::StringLiteral(s)
}
Some(Keyword(Kw::If)) => self.conditional_expr()?,
Some(Keyword(Kw::While)) => self.while_expr()?,
Some(Identifier(_)) => self.identifier_expr()?,
Some(Token::LParen) => self.paren_expr()?,
Some(Keyword(Kw::Fn)) => self.lambda_expr()?,
Some(Token::LSquareBracket) => self.list_expr()?,
Some(Token::LCurlyBrace) => self.struct_expr()?,
Some(e) => {
return ParseError::result_from_str(&format!("Expected primary expression, got \
{:?}",
e));
}
None => return ParseError::result_from_str("Expected primary expression received EoI"),
})
}
fn list_expr(&mut self) -> ParseResult<Expression> {
expect!(self, LSquareBracket);
let exprlist: Vec<Expression> = self.exprlist()?;
expect!(self, RSquareBracket);
Ok(Expression::ListLiteral(VecDeque::from(exprlist)))
}
fn struct_expr(&mut self) -> ParseResult<Expression> {
expect!(self, LCurlyBrace);
let struct_items = self.itemlist()?;
expect!(self, RCurlyBrace);
Ok(Expression::StructLiteral(struct_items))
}
fn number_expression(&mut self) -> ParseResult<Expression> {
let mut multiplier = 1;
loop {
match self.peek() {
Some(NumLiteral(n)) => {
self.next();
return Ok(Expression::Number(n * multiplier as f64));
}
Some(Operator(OpTok(ref a))) if **a == "+" => {
self.next();
}
Some(Operator(OpTok(ref a))) if **a == "-" => {
multiplier *= -1;
self.next();
}
Some(e) => {
return ParseError::result_from_str(
&format!("Expected +, - or number, got {:?}", e));
}
None => {
return ParseError::result_from_str(
&format!("Expected +, - or number, got EoI"));
}
}
}
}
fn lambda_expr(&mut self) -> ParseResult<Expression> {
use self::Expression::*;
expect!(self, Keyword(Kw::Fn));
skip_whitespace!(self);
expect!(self, LParen);
let parameters = self.identlist()?;
expect!(self, RParen);
skip_whitespace!(self);
expect!(self, LCurlyBrace);
let body = self.body()?;
expect!(self, RCurlyBrace);
let prototype = Prototype {
name: Rc::new("a lambda yo!".to_string()),
parameters: parameters,
};
let function = Function {
prototype: prototype,
body: body,
};
Ok(Lambda(function))
}
fn while_expr(&mut self) -> ParseResult<Expression> {
use self::Expression::*;
expect!(self, Keyword(Kw::While));
let test = self.expression()?;
expect!(self, LCurlyBrace);
let body = delimiter_block!(
self,
expression,
Some(RCurlyBrace)
);
expect!(self, RCurlyBrace);
Ok(While(Box::new(test), body))
}
fn conditional_expr(&mut self) -> ParseResult<Expression> {
use self::Expression::*;
expect!(self, Keyword(Kw::If));
let test = self.expression()?;
skip_whitespace!(self);
expect!(self, LCurlyBrace);
skip_whitespace!(self);
let then_block = delimiter_block!(
self,
expression,
Some(RCurlyBrace)
);
expect!(self, RCurlyBrace);
skip_whitespace!(self);
let else_block = if let Some(Keyword(Kw::Else)) = self.peek() {
self.next();
skip_whitespace!(self);
expect!(self, LCurlyBrace);
let else_exprs = delimiter_block!(
self,
expression,
Some(RCurlyBrace)
);
Some(else_exprs)
} else {
None
};
expect!(self, RCurlyBrace);
Ok(Conditional(Box::new(test),
Box::new(Block(VecDeque::from(then_block))),
else_block.map(|list| Box::new(Block(VecDeque::from(list))))))
}
fn identifier_expr(&mut self) -> ParseResult<Expression> {
let name = expect_identifier!(self);
let expr = match self.peek() {
Some(LParen) => {
let args = self.call_expression()?;
Expression::Call(Callable::NamedFunction(name), args)
}
__ => Expression::Variable(name),
};
Ok(expr)
}
fn call_expression(&mut self) -> ParseResult<Vec<Expression>> {
expect!(self, LParen);
let args: Vec<Expression> = self.exprlist()?;
expect!(self, RParen);
Ok(args)
}
fn paren_expr(&mut self) -> ParseResult<Expression> {
expect!(self, Token::LParen);
let expr = self.expression()?;
expect!(self, Token::RParen);
Ok(expr)
}
}
pub fn parse(tokens: &[Token], _parsed_tree: &[Statement]) -> ParseResult<AST> {
let mut parser = Parser::initialize(tokens);
parser.program()
}
/*
#[cfg(test)]
mod tests {
use schala_lang::tokenizer;
use super::*;
use super::Statement::*;
use super::Expression::*;
macro_rules! parsetest {
($input:expr, $output:pat, $ifexpr:expr) => {
{
let tokens = tokenizer::tokenize($input).unwrap();
let ast = parse(&tokens, &[]).unwrap();
match &ast[..] {
$output if $ifexpr => (),
x => panic!("Error in parse test, got {:?} instead", x)
}
}
}
}
#[test]
fn function_parse_test() {
use super::Function;
parsetest!(
"fn a() { 1 + 2 }",
&[FuncDefNode(Function {prototype: Prototype { ref name, ref parameters }, ref body})],
match &body[..] { &[ExprNode(BinExp(_, box Number(1.0), box Number(2.0)))] => true, _ => false }
&& **name == "a" && match &parameters[..] { &[] => true, _ => false }
);
parsetest!(
"fn a(x,y){ 1 + 2 }",
&[FuncDefNode(Function {prototype: Prototype { ref name, ref parameters }, ref body})],
match &body[..] { &[ExprNode(BinExp(_, box Number(1.0), box Number(2.0)))] => true, _ => false }
&& **name == "a" && *parameters[0] == "x" && *parameters[1] == "y" && parameters.len() == 2
);
let t3 = "fn (x) { x + 2 }";
let tokens3 = tokenizer::tokenize(t3).unwrap();
assert!(parse(&tokens3, &[]).is_err());
}
#[test]
fn expression_parse_test() {
parsetest!("a", &[ExprNode(Variable(ref s))], **s == "a");
parsetest!("a + b",
&[ExprNode(BinExp(BinOp::Add, box Variable(ref a), box Variable(ref b)))],
**a == "a" && **b == "b");
parsetest!("a + b * c",
&[ExprNode(BinExp(BinOp::Add, box Variable(ref a), box BinExp(BinOp::Mul, box Variable(ref b), box Variable(ref c))))],
**a == "a" && **b == "b" && **c == "c");
parsetest!("a * b + c",
&[ExprNode(BinExp(BinOp::Add, box BinExp(BinOp::Mul, box Variable(ref a), box Variable(ref b)), box Variable(ref c)))],
**a == "a" && **b == "b" && **c == "c");
parsetest!("(a + b) * c",
&[ExprNode(BinExp(BinOp::Mul, box BinExp(BinOp::Add, box Variable(ref a), box Variable(ref b)), box Variable(ref c)))],
**a == "a" && **b == "b" && **c == "c");
}
#[test]
fn lambda_parse_test() {
use schala_lang::tokenizer;
let t1 = "(fn(x) { x + 2 })";
let tokens1 = tokenizer::tokenize(t1).unwrap();
match parse(&tokens1, &[]).unwrap()[..] {
_ => (),
}
let t2 = "fn(x) { x + 2 }";
let tokens2 = tokenizer::tokenize(t2).unwrap();
assert!(parse(&tokens2, &[]).is_err());
let t3 = "(fn(x) { x + 10 })(20)";
let tokens3 = tokenizer::tokenize(t3).unwrap();
match parse(&tokens3, &[]).unwrap() {
_ => (),
};
}
#[test]
fn conditional_parse_test() {
use schala_lang::tokenizer;
let t1 = "if null { 20 } else { 40 }";
let tokens = tokenizer::tokenize(t1).unwrap();
match parse(&tokens, &[]).unwrap()[..] {
[ExprNode(Conditional(box Null, box Block(_), Some(box Block(_))))] => (),
_ => panic!(),
}
let t2 = r"
if null {
20
} else {
40
}
";
let tokens2 = tokenizer::tokenize(t2).unwrap();
match parse(&tokens2, &[]).unwrap()[..] {
[ExprNode(Conditional(box Null, box Block(_), Some(box Block(_))))] => (),
_ => panic!(),
}
let t2 = r"
if null {
20 } else
{
40
}
";
let tokens3 = tokenizer::tokenize(t2).unwrap();
match parse(&tokens3, &[]).unwrap()[..] {
[ExprNode(Conditional(box Null, box Block(_), Some(box Block(_))))] => (),
_ => panic!(),
}
}
}
*/

208
maaru/src/tokenizer.rs Normal file
View File

@@ -0,0 +1,208 @@
extern crate itertools;
use std::iter::Peekable;
use std::str::Chars;
use self::itertools::Itertools;
use std::rc::Rc;
use TokenError;
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
Newline,
Semicolon,
LParen,
RParen,
LSquareBracket,
RSquareBracket,
LCurlyBrace,
RCurlyBrace,
Comma,
Period,
Colon,
NumLiteral(f64),
StrLiteral(Rc<String>),
Identifier(Rc<String>),
Operator(OpTok),
Keyword(Kw),
}
#[derive(Debug, Clone, PartialEq)]
pub struct OpTok(pub Rc<String>);
#[derive(Debug, Clone, PartialEq)]
pub enum Kw {
If,
Else,
While,
Let,
Fn,
Null,
}
pub type TokenizeResult = Result<Vec<Token>, TokenError>;
fn is_digit(c: &char) -> bool {
c.is_digit(10)
}
pub fn tokenize(input: &str) -> TokenizeResult {
use self::Token::*;
let mut tokens = Vec::new();
let mut iter: Peekable<Chars> = input.chars().peekable();
while let Some(c) = iter.next() {
if c == '#' {
while let Some(c) = iter.next() {
if c == '\n' {
break;
}
}
continue;
}
let cur_tok = match c {
c if char::is_whitespace(c) && c != '\n' => continue,
'\n' => Newline,
';' => Semicolon,
'(' => LParen,
')' => RParen,
':' => Colon,
',' => Comma,
'{' => LCurlyBrace,
'}' => RCurlyBrace,
'[' => LSquareBracket,
']' => RSquareBracket,
'"' => tokenize_str(&mut iter)?,
c if !char::is_alphanumeric(c) => tokenize_operator(c, &mut iter)?,
c @ '.' | c if is_digit(&c) => tokenize_number_or_period(c, &mut iter)?,
c => tokenize_identifier(c, &mut iter)?,
};
tokens.push(cur_tok);
}
Ok(tokens)
}
fn tokenize_str(iter: &mut Peekable<Chars>) -> Result<Token, TokenError> {
let mut buffer = String::new();
loop {
// TODO handle string escapes, interpolation
match iter.next() {
Some(x) if x == '"' => break,
Some(x) => buffer.push(x),
None => return Err(TokenError::new("Unclosed quote")),
}
}
Ok(Token::StrLiteral(Rc::new(buffer)))
}
fn tokenize_operator(c: char, iter: &mut Peekable<Chars>) -> Result<Token, TokenError> {
let mut buffer = String::new();
buffer.push(c);
buffer.extend(iter.peeking_take_while(|x| !char::is_alphanumeric(*x) && !char::is_whitespace(*x)));
Ok(Token::Operator(OpTok(Rc::new(buffer))))
}
fn tokenize_number_or_period(c: char, iter: &mut Peekable<Chars>) -> Result<Token, TokenError> {
if c == '.' && !iter.peek().map_or(false, is_digit) {
return Ok(Token::Period);
}
let mut buffer = String::new();
buffer.push(c);
buffer.extend(iter.peeking_take_while(|x| is_digit(x) || *x == '.'));
match buffer.parse::<f64>() {
Ok(f) => Ok(Token::NumLiteral(f)),
Err(_) => Err(TokenError::new("Failed to parse digit")),
}
}
fn tokenize_identifier(c: char, iter: &mut Peekable<Chars>) -> Result<Token, TokenError> {
fn ends_identifier(c: &char) -> bool {
let c = *c;
char::is_whitespace(c) || is_digit(&c) || c == ';' || c == '(' || c == ')' ||
c == ',' || c == '.' || c == ',' || c == ':' || c == '[' || c == ']'
}
use self::Token::*;
let mut buffer = String::new();
buffer.push(c);
buffer.extend(iter.peeking_take_while(|x| !ends_identifier(x)));
Ok(match &buffer[..] {
"if" => Keyword(Kw::If),
"else" => Keyword(Kw::Else),
"while" => Keyword(Kw::While),
"let" => Keyword(Kw::Let),
"fn" => Keyword(Kw::Fn),
"null" => Keyword(Kw::Null),
b => Identifier(Rc::new(b.to_string())),
})
}
/*
#[cfg(test)]
mod tests {
use super::*;
use super::Token::*;
macro_rules! token_test {
($input: expr, $output: pat, $ifexpr: expr) => {
let tokens = tokenize($input).unwrap();
match tokens[..] {
$output if $ifexpr => (),
_ => panic!("Actual output: {:?}", tokens),
}
}
}
#[test]
fn basic_tokeniziation_tests() {
token_test!("let a = 3\n",
[Keyword(Kw::Let), Identifier(ref a), Operator(OpTok(ref b)), NumLiteral(3.0), Newline],
**a == "a" && **b == "=");
token_test!("2+1",
[NumLiteral(2.0), Operator(OpTok(ref a)), NumLiteral(1.0)],
**a == "+");
token_test!("2 + 1",
[NumLiteral(2.0), Operator(OpTok(ref a)), NumLiteral(1.0)],
**a == "+");
token_test!("2.3*49.2",
[NumLiteral(2.3), Operator(OpTok(ref a)), NumLiteral(49.2)],
**a == "*");
token_test!("a+3",
[Identifier(ref a), NumLiteral(3.0)],
**a == "a+");
assert!(tokenize("2.4.5").is_err());
token_test!("fn my_func(a) { a ? 3[1] }",
[Keyword(Kw::Fn), Identifier(ref a), LParen, Identifier(ref b), RParen, LCurlyBrace, Identifier(ref c),
Operator(OpTok(ref d)), NumLiteral(3.0), LSquareBracket, NumLiteral(1.0), RSquareBracket, RCurlyBrace],
**a == "my_func" && **b == "a" && **c == "a" && **d == "?");
}
#[test]
fn string_test() {
token_test!("null + \"a string\"",
[Keyword(Kw::Null), Operator(OpTok(ref a)), StrLiteral(ref b)],
**a == "+" && **b == "a string");
token_test!("\"{?'q@?\"",
[StrLiteral(ref a)],
**a == "{?'q@?");
}
#[test]
fn operator_test() {
token_test!("a *> b",
[Identifier(ref a), Operator(OpTok(ref b)), Identifier(ref c)],
**a == "a" && **b == "*>" && **c == "b");
}
}
*/