schala/schala-lang/language/src/symbol_table.rs
2019-08-12 11:27:16 -07:00

408 lines
12 KiB
Rust

use std::collections::HashMap;
use std::collections::hash_map::Entry;
use std::rc::Rc;
use std::fmt;
use std::fmt::Write;
use crate::ast;
use crate::ast::{Meta, TypeBody, TypeSingletonName, Signature, Statement};
use crate::typechecking::TypeName;
type LineNumber = u32;
type SymbolTrackTable = HashMap<Rc<String>, LineNumber>;
#[derive(PartialEq, Eq, Hash, Debug)]
struct PathToSymbol(Vec<Rc<String>>);
#[derive(Debug, Clone)]
struct ScopeSegment {
scope_name: Rc<String>,
scope_type: ScopeSegmentKind,
}
#[derive(Debug, Clone)]
enum ScopeSegmentKind {
Function,
//Type,
}
//cf. p. 150 or so of Language Implementation Patterns
pub struct SymbolTable {
values: HashMap<PathToSymbol, Symbol>,
}
//TODO add various types of lookups here, maybe multiple hash tables internally?
impl SymbolTable {
pub fn new() -> SymbolTable {
SymbolTable {
values: HashMap::new(),
}
}
fn add_new_symbol(&mut self, name: &Rc<String>, scope_path: &Vec<ScopeSegment>, spec: SymbolSpec) {
let mut vec: Vec<Rc<String>> = scope_path.iter().map(|segment| segment.scope_name.clone()).collect();
vec.push(name.clone());
let symbol_path = PathToSymbol(vec);
let symbol = Symbol { name: name.clone(), scopes: scope_path.to_vec(), spec };
self.values.insert(symbol_path, symbol);
}
pub fn lookup_by_name(&self, name: &Rc<String>) -> Option<&Symbol> {
self.lookup_by_path(name, &vec![])
}
pub fn lookup_by_path(&self, name: &Rc<String>, path: &Vec<Rc<String>>) -> Option<&Symbol> {
let mut vec = path.clone();
vec.push(name.clone());
let symbol_path = PathToSymbol(vec);
self.values.get(&symbol_path)
}
}
#[derive(Debug)]
pub struct Symbol {
pub name: Rc<String>, //TODO does this need to be pub?
scopes: Vec<ScopeSegment>,
pub spec: SymbolSpec,
}
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "<Name: {}, Spec: {}>", self.name, self.spec)
}
}
#[derive(Debug)]
pub enum SymbolSpec {
Func(Vec<TypeName>),
DataConstructor {
index: usize,
type_name: TypeName,
type_args: Vec<Rc<String>>,
},
RecordConstructor {
members: HashMap<Rc<String>, TypeName>,
type_name: TypeName,
},
Binding
}
impl fmt::Display for SymbolSpec {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::SymbolSpec::*;
match self {
Func(type_names) => write!(f, "Func({:?})", type_names),
DataConstructor { index, type_name, type_args } => write!(f, "DataConstructor(idx: {})({:?} -> {})", index, type_args, type_name),
RecordConstructor { type_name, ..} => write!(f, "RecordConstructor(<members> -> {})", type_name),
Binding => write!(f, "Binding"),
}
}
}
impl SymbolTable {
/* note: this adds names for *forward reference* but doesn't actually create any types. solve that problem
* later */
pub fn add_top_level_symbols(&mut self, ast: &ast::AST) -> Result<(), String> {
let mut scope_name_stack = Vec::new();
self.add_symbols_from_scope(&ast.0, &mut scope_name_stack)
}
fn add_symbols_from_scope<'a>(&'a mut self, statements: &Vec<Meta<Statement>>, scope_name_stack: &mut Vec<ScopeSegment>) -> Result<(), String> {
use self::ast::Declaration::*;
fn insert_and_check_duplicate_symbol(table: &mut SymbolTrackTable, name: &Rc<String>) -> Result<(), String> {
match table.entry(name.clone()) {
Entry::Occupied(o) => {
let line_number = o.get(); //TODO make this actually work
Err(format!("Duplicate definition: {}. It's already defined at {}", name, line_number))
},
Entry::Vacant(v) => {
let line_number = 0; //TODO should work
v.insert(line_number);
Ok(())
}
}
}
let mut seen_identifiers: SymbolTrackTable = HashMap::new();
for meta in statements.iter() {
let statement = meta.node();
if let Statement::Declaration(decl) = statement {
match decl {
FuncSig(ref signature) => {
insert_and_check_duplicate_symbol(&mut seen_identifiers, &signature.name)?;
self.add_function_signature(signature, scope_name_stack)?
}
FuncDecl(ref signature, ref body) => {
insert_and_check_duplicate_symbol(&mut seen_identifiers, &signature.name)?;
self.add_function_signature(signature, scope_name_stack)?;
scope_name_stack.push(ScopeSegment{
scope_name: signature.name.clone(),
scope_type: ScopeSegmentKind::Function,
});
let output = self.add_symbols_from_scope(body, scope_name_stack);
let _ = scope_name_stack.pop();
output?
},
TypeDecl { name, body, mutable } => {
insert_and_check_duplicate_symbol(&mut seen_identifiers, &name.name)?;
self.add_type_decl(name, body, mutable, scope_name_stack)?
},
Binding { name, .. } => {
insert_and_check_duplicate_symbol(&mut seen_identifiers, name)?;
self.add_new_symbol(name, scope_name_stack, SymbolSpec::Binding);
}
_ => ()
}
}
}
Ok(())
}
pub fn debug_symbol_table(&self) -> String {
let mut output = format!("Symbol table\n");
for (name, sym) in &self.values {
write!(output, "{:?} -> {}\n", name, sym).unwrap();
}
output
}
fn add_function_signature(&mut self, signature: &Signature, scope_name_stack: &mut Vec<ScopeSegment>) -> Result<(), String> {
let mut local_type_context = LocalTypeContext::new();
let types = signature.params.iter().map(|param| match param.anno {
Some(ref type_identifier) => Rc::new(format!("{:?}", type_identifier)),
None => local_type_context.new_universal_type()
}).collect();
self.add_new_symbol(&signature.name, scope_name_stack, SymbolSpec::Func(types));
Ok(())
}
//TODO handle type mutability
fn add_type_decl(&mut self, type_name: &TypeSingletonName, body: &TypeBody, _mutable: &bool, scope_name_stack: &mut Vec<ScopeSegment>) -> Result<(), String> {
use crate::ast::{TypeIdentifier, Variant};
let TypeBody(variants) = body;
let ref type_name = type_name.name;
//scope_name_stack.push(name.clone()); //TODO adding this makes variants scoped under their
//type name and breaks a lot of things - don't add it until importing names works
//TODO figure out why _params isn't being used here
for (index, var) in variants.iter().enumerate() {
match var {
Variant::UnitStruct(variant_name) => {
let spec = SymbolSpec::DataConstructor {
index,
type_name: type_name.clone(),
type_args: vec![],
};
self.add_new_symbol(variant_name, scope_name_stack, spec);
},
Variant::TupleStruct(variant_name, tuple_members) => {
//TODO fix the notion of a tuple type
let type_args = tuple_members.iter().map(|type_name| match type_name {
TypeIdentifier::Singleton(TypeSingletonName { name, ..}) => name.clone(),
TypeIdentifier::Tuple(_) => unimplemented!(),
}).collect();
let spec = SymbolSpec::DataConstructor {
index,
type_name: type_name.clone(),
type_args
};
self.add_new_symbol(variant_name, scope_name_stack, spec);
},
Variant::Record { name, members: defined_members } => {
let mut members = HashMap::new();
let mut duplicate_member_definitions = Vec::new();
for (member_name, member_type) in defined_members {
match members.entry(member_name.clone()) {
Entry::Occupied(_) => duplicate_member_definitions.push(member_name.clone()),
Entry::Vacant(v) => {
v.insert(match member_type {
TypeIdentifier::Singleton(TypeSingletonName { name, ..}) => name.clone(),
TypeIdentifier::Tuple(_) => unimplemented!(),
});
}
}
}
if duplicate_member_definitions.len() != 0 {
return Err(format!("Duplicate member(s) in definition of type {}: {:?}", type_name, duplicate_member_definitions));
}
let spec = SymbolSpec::RecordConstructor { type_name: type_name.clone(), members };
self.add_new_symbol(name, scope_name_stack, spec);
},
}
}
//scope_name_stack.pop();
Ok(())
}
}
struct LocalTypeContext {
state: u8
}
impl LocalTypeContext {
fn new() -> LocalTypeContext {
LocalTypeContext { state: 0 }
}
fn new_universal_type(&mut self) -> TypeName {
let n = self.state;
self.state += 1;
Rc::new(format!("{}", (('a' as u8) + n) as char))
}
}
#[cfg(test)]
mod symbol_table_tests {
use super::*;
use crate::util::quick_ast;
macro_rules! values_in_table {
//TODO multiple values
($source:expr, $single_value:expr) => {
{
let mut symbol_table = SymbolTable::new();
let ast = quick_ast($source);
symbol_table.add_top_level_symbols(&ast).unwrap();
match symbol_table.lookup_by_name($single_value) {
Some(_spec) => (),
None => panic!(),
};
}
}
}
#[test]
fn basic_symbol_table() {
values_in_table! { "let a = 10; fn b() { 20 }", &rc!(b) };
}
#[test]
fn no_duplicates() {
let source = r#"
fn a() { 1 }
fn b() { 2 }
fn a() { 3 }
"#;
let mut symbol_table = SymbolTable::new();
let ast = quick_ast(source);
let output = symbol_table.add_top_level_symbols(&ast).unwrap_err();
assert!(output.contains("Duplicate"))
}
#[test]
fn no_duplicates_2() {
let source = r#"
let a = 20;
let q = 39;
let a = 30;
"#;
let mut symbol_table = SymbolTable::new();
let ast = quick_ast(source);
let output = symbol_table.add_top_level_symbols(&ast).unwrap_err();
assert!(output.contains("Duplicate"))
}
#[test]
fn no_duplicates_3() {
let source = r#"
fn a() {
let a = 20
let b = 40
a + b
}
fn q() {
let x = 30
let x = 33
}
"#;
let mut symbol_table = SymbolTable::new();
let ast = quick_ast(source);
let output = symbol_table.add_top_level_symbols(&ast).unwrap_err();
assert!(output.contains("Duplicate"))
}
#[test]
fn dont_falsely_detect_duplicates() {
let source = r#"
let a = 20;
fn some_func() {
let a = 40;
77
}
let q = 39;
"#;
let mut symbol_table = SymbolTable::new();
let ast = quick_ast(source);
symbol_table.add_top_level_symbols(&ast).unwrap();
assert!(symbol_table.lookup_by_path(&rc!(a), &vec![]).is_some());
assert!(symbol_table.lookup_by_path(&rc!(a), &vec![rc!(some_func)]).is_some());
}
#[test]
fn enclosing_scopes() {
let source = r#"
fn outer_func(x) {
fn inner_func(arg) {
arg
}
x + inner_func(x)
}"#;
let mut symbol_table = SymbolTable::new();
let ast = quick_ast(source);
symbol_table.add_top_level_symbols(&ast).unwrap();
assert!(symbol_table.lookup_by_path(&rc!(outer_func), &vec![]).is_some());
assert!(symbol_table.lookup_by_path(&rc!(inner_func), &vec![rc!(outer_func)]).is_some());
}
#[test]
fn enclosing_scopes_2() {
let source = r#"
fn outer_func(x) {
fn inner_func(arg) {
arg
}
fn second_inner_func() {
fn another_inner_func() {
}
}
inner_func(x)
}"#;
let mut symbol_table = SymbolTable::new();
let ast = quick_ast(source);
symbol_table.add_top_level_symbols(&ast).unwrap();
println!("{}", symbol_table.debug_symbol_table());
assert!(symbol_table.lookup_by_path(&rc!(outer_func), &vec![]).is_some());
assert!(symbol_table.lookup_by_path(&rc!(inner_func), &vec![rc!(outer_func)]).is_some());
assert!(symbol_table.lookup_by_path(&rc!(second_inner_func), &vec![rc!(outer_func)]).is_some());
assert!(symbol_table.lookup_by_path(&rc!(another_inner_func), &vec![rc!(outer_func), rc!(second_inner_func)]).is_some());
}
#[test]
fn enclosing_scopes_3() {
let source = r#"
fn outer_func(x) {
fn inner_func(arg) {
arg
}
fn second_inner_func() {
fn another_inner_func() {
}
fn another_inner_func() {
}
}
inner_func(x)
}"#;
let mut symbol_table = SymbolTable::new();
let ast = quick_ast(source);
let output = symbol_table.add_top_level_symbols(&ast).unwrap_err();
assert!(output.contains("Duplicate"))
}
}