From 45c72f97a2ab664c150e1217934e3b794acf877f Mon Sep 17 00:00:00 2001 From: Greg Shuflin Date: Tue, 2 Nov 2021 20:49:38 -0700 Subject: [PATCH] Break up symbol table code into smaller modules --- schala-lang/language/src/symbol_table/mod.rs | 331 +----------------- .../language/src/symbol_table/populator.rs | 307 ++++++++++++++++ 2 files changed, 324 insertions(+), 314 deletions(-) create mode 100644 schala-lang/language/src/symbol_table/populator.rs diff --git a/schala-lang/language/src/symbol_table/mod.rs b/schala-lang/language/src/symbol_table/mod.rs index 7d7aaf5..82cd38d 100644 --- a/schala-lang/language/src/symbol_table/mod.rs +++ b/schala-lang/language/src/symbol_table/mod.rs @@ -1,23 +1,21 @@ #![allow(clippy::enum_variant_names)] use std::{ - collections::{hash_map::Entry, HashMap, HashSet}, + collections::{hash_map::Entry, HashMap}, fmt, rc::Rc, - str::FromStr, }; use crate::{ ast, - ast::{ - Declaration, Expression, ExpressionKind, ItemId, Statement, StatementKind, TypeBody, - TypeSingletonName, Variant, VariantKind, - }, + ast::ItemId, builtin::Builtin, tokenizing::Location, - type_inference::{self, PendingType, TypeBuilder, TypeContext, TypeId, VariantBuilder}, + type_inference::{TypeContext, TypeId}, }; +mod populator; +use populator::SymbolTablePopulator; mod fqsn; pub use fqsn::{Fqsn, ScopeSegment}; mod resolver; @@ -94,18 +92,16 @@ pub struct SymbolTable { } impl SymbolTable { - pub fn new() -> SymbolTable { - let table = SymbolTable { + /// Create a new, empty SymbolTable + pub fn new() -> Self { + Self { def_id_store: IdStore::new(), symbol_trie: SymbolTrie::new(), fq_names: NameTable::new(), types: NameTable::new(), - id_to_def: HashMap::new(), def_to_symbol: HashMap::new(), - }; - - table + } } /// The main entry point into the symbol table. This will traverse the AST in several @@ -116,13 +112,18 @@ impl SymbolTable { ast: &ast::AST, type_context: &mut TypeContext, ) -> Result<(), Vec> { - let mut runner = SymbolTableRunner { type_context, table: self }; + let mut populator = SymbolTablePopulator { type_context, table: self }; - let errs = runner.populate_name_tables(ast); + let errs = populator.populate_name_tables(ast); if !errs.is_empty() { return Err(errs); } - runner.resolve_scopes(ast); + + // Walks the AST, matching the ID of an identifier used in some expression to + // the corresponding Symbol. + let mut resolver = resolver::ScopeResolver::new(self); + resolver.resolve(ast); + Ok(()) } @@ -168,11 +169,6 @@ impl SymbolTable { } } -struct SymbolTableRunner<'a> { - type_context: &'a mut TypeContext, - table: &'a mut SymbolTable, -} - #[allow(dead_code)] #[derive(Debug, Clone)] pub struct Symbol { @@ -239,296 +235,3 @@ impl fmt::Display for SymbolSpec { } } } - -impl<'a> SymbolTableRunner<'a> { - /* note: this adds names for *forward reference* but doesn't actually create any types. solve that problem - * later */ - - fn add_symbol(&mut self, id: &ItemId, fqsn: Fqsn, spec: SymbolSpec) { - self.table.add_symbol(id, fqsn, spec) - } - - /// Walks the AST, matching the ID of an identifier used in some expression to - /// the corresponding Symbol. - fn resolve_scopes(&mut self, ast: &ast::AST) { - let mut resolver = resolver::ScopeResolver::new(self.table); - resolver.resolve(ast); - } - - /// This function traverses the AST and adds symbol table entries for - /// constants, functions, types, and modules defined within. This simultaneously - /// checks for dupicate definitions (and returns errors if discovered), and sets - /// up name tables that will be used by further parts of the compiler - fn populate_name_tables(&mut self, ast: &ast::AST) -> Vec { - let mut scope_stack = vec![]; - self.add_from_scope(ast.statements.as_ref(), &mut scope_stack, false) - } - - fn add_from_scope( - &mut self, - statements: &[Statement], - scope_stack: &mut Vec, - function_scope: bool, - ) -> Vec { - let mut errors = vec![]; - - for statement in statements { - let Statement { id, kind, location } = statement; - let location = *location; - if let Err(err) = self.add_single_statement(id, kind, location, scope_stack, function_scope) { - errors.push(err); - } else { - // If there's an error with a name, don't recurse into subscopes of that name - let recursive_errs = match kind { - StatementKind::Declaration(Declaration::FuncDecl(signature, body)) => { - let new_scope = ScopeSegment::Name(signature.name.clone()); - scope_stack.push(new_scope); - let output = self.add_from_scope(body.as_ref(), scope_stack, true); - scope_stack.pop(); - output - } - StatementKind::Declaration(Declaration::Module { name, items }) => { - let new_scope = ScopeSegment::Name(name.clone()); - scope_stack.push(new_scope); - let output = self.add_from_scope(items.as_ref(), scope_stack, false); - scope_stack.pop(); - output - } - StatementKind::Declaration(Declaration::TypeDecl { name, body, mutable }) => - self.add_type_members(name, body, mutable, location, scope_stack), - _ => vec![], - }; - errors.extend(recursive_errs.into_iter()); - } - } - - errors - } - - fn add_single_statement( - &mut self, - id: &ItemId, - kind: &StatementKind, - location: Location, - scope_stack: &[ScopeSegment], - function_scope: bool, - ) -> Result<(), SymbolError> { - match kind { - StatementKind::Declaration(Declaration::FuncSig(signature)) => { - let fq_function = Fqsn::from_scope_stack(scope_stack, signature.name.clone()); - self.table - .fq_names - .register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?; - self.table.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind })?; - - self.add_symbol(id, fq_function, SymbolSpec::Func); - } - StatementKind::Declaration(Declaration::FuncDecl(signature, ..)) => { - let fn_name = &signature.name; - let fq_function = Fqsn::from_scope_stack(scope_stack, fn_name.clone()); - self.table - .fq_names - .register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?; - self.table.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind })?; - - self.add_symbol(id, fq_function, SymbolSpec::Func); - } - StatementKind::Declaration(Declaration::TypeDecl { name, .. }) => { - let fq_type = Fqsn::from_scope_stack(scope_stack, name.name.clone()); - self.table.types.register(fq_type, NameSpec { location, kind: TypeKind })?; - } - StatementKind::Declaration(Declaration::Binding { name, .. }) => { - let fq_binding = Fqsn::from_scope_stack(scope_stack, name.clone()); - self.table - .fq_names - .register(fq_binding.clone(), NameSpec { location, kind: NameKind::Binding })?; - if !function_scope { - self.add_symbol(id, fq_binding, SymbolSpec::GlobalBinding); - } - } - StatementKind::Declaration(Declaration::Module { name, .. }) => { - let fq_module = Fqsn::from_scope_stack(scope_stack, name.clone()); - self.table.fq_names.register(fq_module, NameSpec { location, kind: NameKind::Module })?; - } - StatementKind::Declaration(Declaration::Annotation { name, arguments, inner }) => { - let inner = inner.as_ref(); - self.add_single_statement( - &inner.id, - &inner.kind, - inner.location, - scope_stack, - function_scope, - )?; - self.process_annotation(name.as_ref(), arguments.as_slice(), scope_stack, inner)?; - } - _ => (), - } - Ok(()) - } - - fn process_annotation( - &mut self, - name: &str, - arguments: &[Expression], - scope_stack: &[ScopeSegment], - inner: &Statement, - ) -> Result<(), SymbolError> { - println!("handling annotation: {}", name); - if name == "register_builtin" { - if let Statement { - id: _, - location: _, - kind: StatementKind::Declaration(Declaration::FuncDecl(sig, _)), - } = inner - { - let fqsn = Fqsn::from_scope_stack(scope_stack, sig.name.clone()); - let builtin_name = match arguments { - [Expression { kind: ExpressionKind::Value(qname), .. }] - if qname.components.len() == 1 => - qname.components[0].clone(), - _ => - return Err(SymbolError::BadAnnotation { - name: name.to_string(), - msg: "Bad argument for register_builtin".to_string(), - }), - }; - - let builtin = - Builtin::from_str(builtin_name.as_str()).map_err(|_| SymbolError::BadAnnotation { - name: name.to_string(), - msg: format!("Invalid builtin: {}", builtin_name), - })?; - - self.table.populate_single_builtin(fqsn, builtin); - Ok(()) - } else { - Err(SymbolError::BadAnnotation { - name: name.to_string(), - msg: "register_builtin not annotating a function".to_string(), - }) - } - } else { - Err(SymbolError::UnknownAnnotation { name: name.to_string() }) - } - } - - fn add_type_members( - &mut self, - type_name: &TypeSingletonName, - type_body: &TypeBody, - _mutable: &bool, - location: Location, - scope_stack: &mut Vec, - ) -> Vec { - let (variants, immediate_variant) = match type_body { - TypeBody::Variants(variants) => (variants.clone(), false), - TypeBody::ImmediateRecord(id, fields) => ( - vec![Variant { - id: *id, - name: type_name.name.clone(), - kind: VariantKind::Record(fields.clone()), - }], - true, - ), - }; - let type_fqsn = Fqsn::from_scope_stack(scope_stack, type_name.name.clone()); - - let new_scope = ScopeSegment::Name(type_name.name.clone()); - scope_stack.push(new_scope); - - // Check for duplicates before registering any types with the TypeContext - let mut seen_variants = HashSet::new(); - let mut errors = vec![]; - - for variant in variants.iter() { - if seen_variants.contains(&variant.name) { - errors.push(SymbolError::DuplicateVariant { - type_fqsn: type_fqsn.clone(), - name: variant.name.as_ref().to_string(), - }) - } - seen_variants.insert(variant.name.clone()); - - if let VariantKind::Record(ref members) = variant.kind { - let variant_name = Fqsn::from_scope_stack(scope_stack.as_ref(), variant.name.clone()); - let mut seen_members = HashMap::new(); - for (member_name, _) in members.iter() { - match seen_members.entry(member_name.as_ref()) { - Entry::Occupied(o) => { - let location = *o.get(); - errors.push(SymbolError::DuplicateRecord { - type_name: variant_name.clone(), - location, - member: member_name.as_ref().to_string(), - }); - } - //TODO eventually this should track meaningful locations - Entry::Vacant(v) => { - v.insert(location); - } - } - } - } - } - - if !errors.is_empty() { - return errors; - } - - let mut type_builder = TypeBuilder::new(type_name.name.as_ref()); - - let mut fqsn_id_map = HashMap::new(); - for variant in variants.iter() { - let Variant { name, kind, id } = variant; - - fqsn_id_map.insert(Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone()), id); - - let mut variant_builder = VariantBuilder::new(name.as_ref()); - match kind { - VariantKind::UnitStruct => (), - VariantKind::TupleStruct(items) => - for type_identifier in items { - let pending: PendingType = type_identifier.into(); - variant_builder.add_member(pending); - }, - VariantKind::Record(members) => - for (field_name, type_identifier) in members.iter() { - let pending: PendingType = type_identifier.into(); - variant_builder.add_record_member(field_name.as_ref(), pending); - }, - } - type_builder.add_variant(variant_builder); - } - - let type_id = self.type_context.register_type(type_builder); - let type_definition = self.type_context.lookup_type(&type_id).unwrap(); - - // This index is guaranteed to be the correct tag - for (index, variant) in type_definition.variants.iter().enumerate() { - let fqsn = Fqsn::from_scope_stack(scope_stack.as_ref(), Rc::new(variant.name.to_string())); - let id = fqsn_id_map.get(&fqsn).unwrap(); - let tag = index as u32; - let spec = match &variant.members { - type_inference::VariantMembers::Unit => SymbolSpec::DataConstructor { tag, type_id }, - type_inference::VariantMembers::Tuple(..) => SymbolSpec::DataConstructor { tag, type_id }, - type_inference::VariantMembers::Record(..) => SymbolSpec::RecordConstructor { tag, type_id }, - }; - self.table.add_symbol(id, fqsn, spec); - } - - if immediate_variant { - let variant = &type_definition.variants[0]; - let fqsn = Fqsn::from_scope_stack(scope_stack.as_ref(), Rc::new(variant.name.to_string())); - let id = fqsn_id_map.get(&fqsn).unwrap(); - let abbrev_fqsn = Fqsn::from_scope_stack( - scope_stack[0..scope_stack.len() - 1].as_ref(), - Rc::new(variant.name.to_string()), - ); - let spec = SymbolSpec::RecordConstructor { tag: 0, type_id }; - self.table.add_symbol(id, abbrev_fqsn, spec); - } - - scope_stack.pop(); - vec![] - } -} diff --git a/schala-lang/language/src/symbol_table/populator.rs b/schala-lang/language/src/symbol_table/populator.rs new file mode 100644 index 0000000..1ad2f64 --- /dev/null +++ b/schala-lang/language/src/symbol_table/populator.rs @@ -0,0 +1,307 @@ +use std::{ + collections::{hash_map::Entry, HashMap, HashSet}, + rc::Rc, + str::FromStr, +}; + +use super::{Fqsn, NameKind, NameSpec, ScopeSegment, SymbolError, SymbolSpec, SymbolTable, TypeKind}; +use crate::{ + ast::{ + Declaration, Expression, ExpressionKind, ItemId, Statement, StatementKind, TypeBody, + TypeSingletonName, Variant, VariantKind, AST, + }, + builtin::Builtin, + tokenizing::Location, + type_inference::{self, PendingType, TypeBuilder, TypeContext, VariantBuilder}, +}; + +pub(super) struct SymbolTablePopulator<'a> { + pub(super) type_context: &'a mut TypeContext, + pub(super) table: &'a mut SymbolTable, +} + +impl<'a> SymbolTablePopulator<'a> { + /* note: this adds names for *forward reference* but doesn't actually create any types. solve that problem + * later */ + + fn add_symbol(&mut self, id: &ItemId, fqsn: Fqsn, spec: SymbolSpec) { + self.table.add_symbol(id, fqsn, spec) + } + + /// This function traverses the AST and adds symbol table entries for + /// constants, functions, types, and modules defined within. This simultaneously + /// checks for dupicate definitions (and returns errors if discovered), and sets + /// up name tables that will be used by further parts of the compiler + pub fn populate_name_tables(&mut self, ast: &AST) -> Vec { + let mut scope_stack = vec![]; + self.add_from_scope(ast.statements.as_ref(), &mut scope_stack, false) + } + + fn add_from_scope( + &mut self, + statements: &[Statement], + scope_stack: &mut Vec, + function_scope: bool, + ) -> Vec { + let mut errors = vec![]; + + for statement in statements { + let Statement { id, kind, location } = statement; + let location = *location; + if let Err(err) = self.add_single_statement(id, kind, location, scope_stack, function_scope) { + errors.push(err); + } else { + // If there's an error with a name, don't recurse into subscopes of that name + let recursive_errs = match kind { + StatementKind::Declaration(Declaration::FuncDecl(signature, body)) => { + let new_scope = ScopeSegment::Name(signature.name.clone()); + scope_stack.push(new_scope); + let output = self.add_from_scope(body.as_ref(), scope_stack, true); + scope_stack.pop(); + output + } + StatementKind::Declaration(Declaration::Module { name, items }) => { + let new_scope = ScopeSegment::Name(name.clone()); + scope_stack.push(new_scope); + let output = self.add_from_scope(items.as_ref(), scope_stack, false); + scope_stack.pop(); + output + } + StatementKind::Declaration(Declaration::TypeDecl { name, body, mutable }) => + self.add_type_members(name, body, mutable, location, scope_stack), + _ => vec![], + }; + errors.extend(recursive_errs.into_iter()); + } + } + + errors + } + + fn add_single_statement( + &mut self, + id: &ItemId, + kind: &StatementKind, + location: Location, + scope_stack: &[ScopeSegment], + function_scope: bool, + ) -> Result<(), SymbolError> { + match kind { + StatementKind::Declaration(Declaration::FuncSig(signature)) => { + let fq_function = Fqsn::from_scope_stack(scope_stack, signature.name.clone()); + self.table + .fq_names + .register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?; + self.table.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind })?; + + self.add_symbol(id, fq_function, SymbolSpec::Func); + } + StatementKind::Declaration(Declaration::FuncDecl(signature, ..)) => { + let fn_name = &signature.name; + let fq_function = Fqsn::from_scope_stack(scope_stack, fn_name.clone()); + self.table + .fq_names + .register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?; + self.table.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind })?; + + self.add_symbol(id, fq_function, SymbolSpec::Func); + } + StatementKind::Declaration(Declaration::TypeDecl { name, .. }) => { + let fq_type = Fqsn::from_scope_stack(scope_stack, name.name.clone()); + self.table.types.register(fq_type, NameSpec { location, kind: TypeKind })?; + } + StatementKind::Declaration(Declaration::Binding { name, .. }) => { + let fq_binding = Fqsn::from_scope_stack(scope_stack, name.clone()); + self.table + .fq_names + .register(fq_binding.clone(), NameSpec { location, kind: NameKind::Binding })?; + if !function_scope { + self.add_symbol(id, fq_binding, SymbolSpec::GlobalBinding); + } + } + StatementKind::Declaration(Declaration::Module { name, .. }) => { + let fq_module = Fqsn::from_scope_stack(scope_stack, name.clone()); + self.table.fq_names.register(fq_module, NameSpec { location, kind: NameKind::Module })?; + } + StatementKind::Declaration(Declaration::Annotation { name, arguments, inner }) => { + let inner = inner.as_ref(); + self.add_single_statement( + &inner.id, + &inner.kind, + inner.location, + scope_stack, + function_scope, + )?; + self.process_annotation(name.as_ref(), arguments.as_slice(), scope_stack, inner)?; + } + _ => (), + } + Ok(()) + } + + fn process_annotation( + &mut self, + name: &str, + arguments: &[Expression], + scope_stack: &[ScopeSegment], + inner: &Statement, + ) -> Result<(), SymbolError> { + println!("handling annotation: {}", name); + if name == "register_builtin" { + if let Statement { + id: _, + location: _, + kind: StatementKind::Declaration(Declaration::FuncDecl(sig, _)), + } = inner + { + let fqsn = Fqsn::from_scope_stack(scope_stack, sig.name.clone()); + let builtin_name = match arguments { + [Expression { kind: ExpressionKind::Value(qname), .. }] + if qname.components.len() == 1 => + qname.components[0].clone(), + _ => + return Err(SymbolError::BadAnnotation { + name: name.to_string(), + msg: "Bad argument for register_builtin".to_string(), + }), + }; + + let builtin = + Builtin::from_str(builtin_name.as_str()).map_err(|_| SymbolError::BadAnnotation { + name: name.to_string(), + msg: format!("Invalid builtin: {}", builtin_name), + })?; + + self.table.populate_single_builtin(fqsn, builtin); + Ok(()) + } else { + Err(SymbolError::BadAnnotation { + name: name.to_string(), + msg: "register_builtin not annotating a function".to_string(), + }) + } + } else { + Err(SymbolError::UnknownAnnotation { name: name.to_string() }) + } + } + + fn add_type_members( + &mut self, + type_name: &TypeSingletonName, + type_body: &TypeBody, + _mutable: &bool, + location: Location, + scope_stack: &mut Vec, + ) -> Vec { + let (variants, immediate_variant) = match type_body { + TypeBody::Variants(variants) => (variants.clone(), false), + TypeBody::ImmediateRecord(id, fields) => ( + vec![Variant { + id: *id, + name: type_name.name.clone(), + kind: VariantKind::Record(fields.clone()), + }], + true, + ), + }; + let type_fqsn = Fqsn::from_scope_stack(scope_stack, type_name.name.clone()); + + let new_scope = ScopeSegment::Name(type_name.name.clone()); + scope_stack.push(new_scope); + + // Check for duplicates before registering any types with the TypeContext + let mut seen_variants = HashSet::new(); + let mut errors = vec![]; + + for variant in variants.iter() { + if seen_variants.contains(&variant.name) { + errors.push(SymbolError::DuplicateVariant { + type_fqsn: type_fqsn.clone(), + name: variant.name.as_ref().to_string(), + }) + } + seen_variants.insert(variant.name.clone()); + + if let VariantKind::Record(ref members) = variant.kind { + let variant_name = Fqsn::from_scope_stack(scope_stack.as_ref(), variant.name.clone()); + let mut seen_members = HashMap::new(); + for (member_name, _) in members.iter() { + match seen_members.entry(member_name.as_ref()) { + Entry::Occupied(o) => { + let location = *o.get(); + errors.push(SymbolError::DuplicateRecord { + type_name: variant_name.clone(), + location, + member: member_name.as_ref().to_string(), + }); + } + //TODO eventually this should track meaningful locations + Entry::Vacant(v) => { + v.insert(location); + } + } + } + } + } + + if !errors.is_empty() { + return errors; + } + + let mut type_builder = TypeBuilder::new(type_name.name.as_ref()); + + let mut fqsn_id_map = HashMap::new(); + for variant in variants.iter() { + let Variant { name, kind, id } = variant; + + fqsn_id_map.insert(Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone()), id); + + let mut variant_builder = VariantBuilder::new(name.as_ref()); + match kind { + VariantKind::UnitStruct => (), + VariantKind::TupleStruct(items) => + for type_identifier in items { + let pending: PendingType = type_identifier.into(); + variant_builder.add_member(pending); + }, + VariantKind::Record(members) => + for (field_name, type_identifier) in members.iter() { + let pending: PendingType = type_identifier.into(); + variant_builder.add_record_member(field_name.as_ref(), pending); + }, + } + type_builder.add_variant(variant_builder); + } + + let type_id = self.type_context.register_type(type_builder); + let type_definition = self.type_context.lookup_type(&type_id).unwrap(); + + // This index is guaranteed to be the correct tag + for (index, variant) in type_definition.variants.iter().enumerate() { + let fqsn = Fqsn::from_scope_stack(scope_stack.as_ref(), Rc::new(variant.name.to_string())); + let id = fqsn_id_map.get(&fqsn).unwrap(); + let tag = index as u32; + let spec = match &variant.members { + type_inference::VariantMembers::Unit => SymbolSpec::DataConstructor { tag, type_id }, + type_inference::VariantMembers::Tuple(..) => SymbolSpec::DataConstructor { tag, type_id }, + type_inference::VariantMembers::Record(..) => SymbolSpec::RecordConstructor { tag, type_id }, + }; + self.table.add_symbol(id, fqsn, spec); + } + + if immediate_variant { + let variant = &type_definition.variants[0]; + let fqsn = Fqsn::from_scope_stack(scope_stack.as_ref(), Rc::new(variant.name.to_string())); + let id = fqsn_id_map.get(&fqsn).unwrap(); + let abbrev_fqsn = Fqsn::from_scope_stack( + scope_stack[0..scope_stack.len() - 1].as_ref(), + Rc::new(variant.name.to_string()), + ); + let spec = SymbolSpec::RecordConstructor { tag: 0, type_id }; + self.table.add_symbol(id, abbrev_fqsn, spec); + } + + scope_stack.pop(); + vec![] + } +}