From 3ed5f1d16c826a783a6829176156ef28508ef8a9 Mon Sep 17 00:00:00 2001 From: Greg Shuflin Date: Thu, 14 Oct 2021 06:18:17 -0700 Subject: [PATCH] Refactor SourceMap Move it into the SymbolTable entirely, make the parser not care about it. --- schala-lang/language/src/ast.rs | 5 +++- schala-lang/language/src/ast/visitor_test.rs | 2 +- schala-lang/language/src/error.rs | 5 ++-- schala-lang/language/src/eval/test.rs | 5 ++-- schala-lang/language/src/parsing.rs | 15 ++++------ schala-lang/language/src/parsing/test.rs | 28 +++++++++++-------- schala-lang/language/src/schala.rs | 10 ++----- schala-lang/language/src/source_map.rs | 20 +++---------- schala-lang/language/src/symbol_table.rs | 27 ++++++++++-------- schala-lang/language/src/symbol_table/test.rs | 8 ++---- schala-lang/language/src/tokenizing.rs | 19 +++++++++++-- schala-lang/language/src/typechecking.rs | 2 +- schala-lang/language/src/util.rs | 11 ++------ 13 files changed, 76 insertions(+), 81 deletions(-) diff --git a/schala-lang/language/src/ast.rs b/schala-lang/language/src/ast.rs index 29868a2..4a06e67 100644 --- a/schala-lang/language/src/ast.rs +++ b/schala-lang/language/src/ast.rs @@ -9,9 +9,10 @@ mod operators; pub use operators::*; pub use visitor::ASTVisitor; pub use walker::walk_ast; +use crate::tokenizing::Location; /// An abstract identifier for an AST node -#[derive(Debug, PartialEq, Eq, Hash, Clone)] +#[derive(Debug, PartialEq, Eq, Hash, Clone, Default)] pub struct ItemId { idx: u32, } @@ -57,6 +58,8 @@ pub struct AST { pub struct Statement { #[derivative(PartialEq="ignore")] pub id: ItemId, + #[derivative(PartialEq="ignore")] + pub location: Location, pub kind: StatementKind, } diff --git a/schala-lang/language/src/ast/visitor_test.rs b/schala-lang/language/src/ast/visitor_test.rs index 7c9246c..7c92a7a 100644 --- a/schala-lang/language/src/ast/visitor_test.rs +++ b/schala-lang/language/src/ast/visitor_test.rs @@ -22,7 +22,7 @@ impl ASTVisitor for Tester { #[test] fn foo() { let mut tester = Tester { count: 0, float_count: 0 }; - let (ast, _) = quick_ast(r#" + let ast = quick_ast(r#" import gragh let a = 20 + 84 diff --git a/schala-lang/language/src/error.rs b/schala-lang/language/src/error.rs index 3c6a409..fb80829 100644 --- a/schala-lang/language/src/error.rs +++ b/schala-lang/language/src/error.rs @@ -1,7 +1,6 @@ use crate::parsing::ParseError; use crate::schala::{SourceReference, Stage}; -use crate::source_map::Location; -use crate::tokenizing::{Token, TokenKind}; +use crate::tokenizing::{Token, TokenKind, Location}; use crate::typechecking::TypeError; pub struct SchalaError { @@ -85,7 +84,7 @@ struct Error { fn format_parse_error(error: ParseError, source_reference: &SourceReference) -> String { let line_num = error.token.location.line_num; let ch = error.token.location.char_num; - let line_from_program = source_reference.get_line(line_num); + let line_from_program = source_reference.get_line(line_num as usize); let location_pointer = format!("{}^", " ".repeat(ch)); let line_num_digits = format!("{}", line_num).chars().count(); diff --git a/schala-lang/language/src/eval/test.rs b/schala-lang/language/src/eval/test.rs index 5c534cc..60837fd 100644 --- a/schala-lang/language/src/eval/test.rs +++ b/schala-lang/language/src/eval/test.rs @@ -9,9 +9,8 @@ use crate::reduced_ast::reduce; use crate::eval::State; fn evaluate_all_outputs(input: &str) -> Vec> { - let (mut ast, source_map) = crate::util::quick_ast(input); - let source_map = Rc::new(RefCell::new(source_map)); - let symbol_table = Rc::new(RefCell::new(SymbolTable::new(source_map))); + let mut ast = crate::util::quick_ast(input); + let symbol_table = Rc::new(RefCell::new(SymbolTable::new())); symbol_table.borrow_mut().add_top_level_symbols(&ast).unwrap(); { let mut scope_resolver = ScopeResolver::new(symbol_table.clone()); diff --git a/schala-lang/language/src/parsing.rs b/schala-lang/language/src/parsing.rs index 525d01d..8c675ad 100644 --- a/schala-lang/language/src/parsing.rs +++ b/schala-lang/language/src/parsing.rs @@ -166,10 +166,9 @@ use std::str::FromStr; use crate::tokenizing::*; use crate::tokenizing::Kw::*; use crate::tokenizing::TokenKind::*; +use crate::tokenizing::Location; -use crate::source_map::Location; use crate::ast::*; -use crate::schala::SourceMapHandle; /// Represents a parsing error #[derive(Debug)] @@ -202,7 +201,6 @@ pub struct Parser { parse_level: u32, restrictions: ParserRestrictions, id_store: ItemIdStore, - source_map: SourceMapHandle } @@ -247,14 +245,13 @@ impl TokenHandler { impl Parser { /// Create a new parser initialized with some tokens. - pub fn new(source_map: SourceMapHandle) -> Parser { + pub fn new() -> Parser { Parser { token_handler: TokenHandler::new(vec![]), parse_record: vec![], parse_level: 0, restrictions: ParserRestrictions { no_struct_literal: false }, id_store: ItemIdStore::new(), - source_map, } } @@ -383,8 +380,7 @@ impl Parser { _ => self.expression().map(|expr| { StatementKind::Expression(expr) } ), }?; let id = self.id_store.fresh(); - self.source_map.borrow_mut().add_location(&id, tok.location); - Ok(Statement { kind, id }) + Ok(Statement { kind, id, location: tok.location }) } #[recursive_descent_method] @@ -1040,11 +1036,12 @@ impl Parser { #[recursive_descent_method] fn expr_or_block(&mut self) -> ParseResult { - match self.token_handler.peek_kind() { + let tok = self.token_handler.peek(); + match tok.get_kind() { LCurlyBrace => self.block(), _ => { let expr = self.expression()?; - let s = Statement { id: self.id_store.fresh(), kind: StatementKind::Expression(expr) }; + let s = Statement { id: self.id_store.fresh(), location: tok.location, kind: StatementKind::Expression(expr) }; Ok(vec![s]) } } diff --git a/schala-lang/language/src/parsing/test.rs b/schala-lang/language/src/parsing/test.rs index 50c4c09..3d9df00 100644 --- a/schala-lang/language/src/parsing/test.rs +++ b/schala-lang/language/src/parsing/test.rs @@ -1,8 +1,8 @@ #![cfg(test)] -use std::cell::RefCell; use std::rc::Rc; use std::str::FromStr; +use crate::tokenizing::Location; use super::{Parser, ParseResult, tokenize}; use crate::ast::*; use super::Declaration::*; @@ -14,10 +14,8 @@ use super::Variant::*; use super::ForBody::*; fn make_parser(input: &str) -> Parser { - let source_map = crate::source_map::SourceMap::new(); - let source_map_handle = Rc::new(RefCell::new(source_map)); let tokens: Vec = tokenize(input); - let mut parser = super::Parser::new(source_map_handle); + let mut parser = super::Parser::new(); parser.add_new_tokens(tokens); parser } @@ -27,6 +25,15 @@ fn parse(input: &str) -> ParseResult { parser.parse() } +//TODO maybe can be const? +fn make_statement(kind: StatementKind) -> Statement { + Statement { + location: Location::default(), + id: ItemId::default(), + kind, + } +} + macro_rules! parse_test { ($string:expr, $correct:expr) => { assert_eq!(parse($string).unwrap(), $correct) @@ -61,19 +68,19 @@ macro_rules! tys { macro_rules! decl { ($expr_type:expr) => { - Statement { id: ItemIdStore::new_id(), kind: StatementKind::Declaration($expr_type) } + make_statement(StatementKind::Declaration($expr_type)) }; } macro_rules! import { ($import_spec:expr) => { - Statement { id: ItemIdStore::new_id(), kind: StatementKind::Import($import_spec) } + make_statement(StatementKind::Import($import_spec)) } } macro_rules! module { ($module_spec:expr) => { - Statement { id: ItemIdStore::new_id(), kind: StatementKind::Module($module_spec) } + make_statement(StatementKind::Module($module_spec)) } } @@ -99,10 +106,9 @@ macro_rules! prefexp { ($op:expr, $lhs:expr) => { PrefixExp(PrefixOp::from_str($op).unwrap(), bx!(Expression::new(ItemIdStore::new_id(), $lhs).into())) } } macro_rules! exst { - ($expr_type:expr) => { Statement { id: ItemIdStore::new_id(), kind: StatementKind::Expression(Expression::new(ItemIdStore::new_id(), $expr_type).into())} }; - ($expr_type:expr, $type_anno:expr) => { Statement { id: ItemIdStore::new_id(), kind: StatementKind::Expression(Expression::with_anno(ItemIdStore::new_id(), $expr_type, $type_anno).into())} }; - ($op:expr, $lhs:expr, $rhs:expr) => { Statement { id: ItemIdStore::new_id(), ,kind: StatementKind::Expression(ex!(binexp!($op, $lhs, $rhs)))} - }; + ($expr_type:expr) => { make_statement(StatementKind::Expression(Expression::new(ItemIdStore::new_id(), $expr_type).into())) }; + ($expr_type:expr, $type_anno:expr) => { make_statement(StatementKind::Expression(Expression::with_anno(ItemIdStore::new_id(), $expr_type, $type_anno).into())) }; + ($op:expr, $lhs:expr, $rhs:expr) => { make_statement(StatementKind::Expression(ex!(binexp!($op, $lhs, $rhs)))) }; (s $statement_text:expr) => { { let mut parser = make_parser($statement_text); diff --git a/schala-lang/language/src/schala.rs b/schala-lang/language/src/schala.rs index 07f7b73..769925b 100644 --- a/schala-lang/language/src/schala.rs +++ b/schala-lang/language/src/schala.rs @@ -6,11 +6,10 @@ use std::rc::Rc; use schala_repl::{ProgrammingLanguageInterface, ComputationRequest, ComputationResponse, LangMetaRequest, LangMetaResponse, GlobalOutputStats}; -use crate::{reduced_ast, tokenizing, parsing, eval, typechecking, symbol_table, source_map}; +use crate::{reduced_ast, tokenizing, parsing, eval, typechecking, symbol_table}; use crate::error::SchalaError; pub type SymbolTableHandle = Rc>; -pub type SourceMapHandle = Rc>; /// All the state necessary to parse and execute a Schala program are stored in this struct. /// `state` represents the execution state for the AST-walking interpreter, the other fields @@ -18,7 +17,6 @@ pub type SourceMapHandle = Rc>; #[allow(dead_code)] pub struct Schala { source_reference: SourceReference, - source_map: SourceMapHandle, state: eval::State<'static>, symbol_table: SymbolTableHandle, resolver: crate::scope_resolution::ScopeResolver<'static>, @@ -40,17 +38,15 @@ impl Schala { impl Schala { /// Creates a new Schala environment *without* any prelude. fn new_blank_env() -> Schala { - let source_map = Rc::new(RefCell::new(source_map::SourceMap::new())); - let symbols = Rc::new(RefCell::new(symbol_table::SymbolTable::new(source_map.clone()))); + let symbols = Rc::new(RefCell::new(symbol_table::SymbolTable::new())); Schala { //TODO maybe these can be the same structure source_reference: SourceReference::new(), symbol_table: symbols.clone(), - source_map: source_map.clone(), resolver: crate::scope_resolution::ScopeResolver::new(symbols.clone()), state: eval::State::new(), type_context: typechecking::TypeContext::new(), - active_parser: parsing::Parser::new(source_map) + active_parser: parsing::Parser::new() } } diff --git a/schala-lang/language/src/source_map.rs b/schala-lang/language/src/source_map.rs index cfa2ce7..7244d8b 100644 --- a/schala-lang/language/src/source_map.rs +++ b/schala-lang/language/src/source_map.rs @@ -1,22 +1,10 @@ use std::collections::HashMap; -use std::fmt; use crate::ast::ItemId; +use crate::tokenizing::Location; -pub type LineNumber = usize; - -#[derive(Debug, Clone, Copy, PartialEq)] -pub struct Location { - pub line_num: LineNumber, - pub char_num: usize, -} - -impl fmt::Display for Location { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}:{}", self.line_num, self.char_num) - } -} +//TODO rename this type to make its purpose clearer pub struct SourceMap { map: HashMap } @@ -26,11 +14,11 @@ impl SourceMap { SourceMap { map: HashMap::new() } } - pub fn add_location(&mut self, id: &ItemId, loc: Location) { + pub(crate) fn add_location(&mut self, id: &ItemId, loc: Location) { self.map.insert(id.clone(), loc); } - pub fn lookup(&self, id: &ItemId) -> Option { + pub(crate) fn lookup(&self, id: &ItemId) -> Option { match self.map.get(id) { Some(loc) => Some(loc.clone()), None => None diff --git a/schala-lang/language/src/symbol_table.rs b/schala-lang/language/src/symbol_table.rs index 64662e8..2ba2023 100644 --- a/schala-lang/language/src/symbol_table.rs +++ b/schala-lang/language/src/symbol_table.rs @@ -4,8 +4,8 @@ use std::rc::Rc; use std::fmt; use std::fmt::Write; -use crate::schala::SourceMapHandle; -use crate::source_map::{SourceMap, LineNumber}; +use crate::tokenizing::LineNumber; +use crate::source_map::SourceMap; use crate::ast; use crate::ast::{ItemId, TypeBody, TypeSingletonName, Signature, Statement, StatementKind, ModuleSpecifier}; use crate::typechecking::TypeName; @@ -93,16 +93,16 @@ impl ScopeSegment { //cf. p. 150 or so of Language Implementation Patterns pub struct SymbolTable { - source_map_handle: SourceMapHandle, + source_map: SourceMap, symbol_path_to_symbol: HashMap, id_to_fqsn: HashMap, symbol_trie: SymbolTrie, } impl SymbolTable { - pub fn new(source_map_handle: SourceMapHandle) -> SymbolTable { + pub fn new() -> SymbolTable { SymbolTable { - source_map_handle, + source_map: SourceMap::new(), symbol_path_to_symbol: HashMap::new(), id_to_fqsn: HashMap::new(), symbol_trie: SymbolTrie::new() @@ -198,15 +198,17 @@ impl SymbolTable { for statement in statements.iter() { match statement { - Statement { kind: StatementKind::Declaration(decl), id } => { + Statement { kind: StatementKind::Declaration(decl), id, location, } => { + self.source_map.add_location(id, *location); + match decl { FuncSig(ref signature) => { - seen_identifiers.try_register(&signature.name, &id, &self.source_map_handle.borrow()) + seen_identifiers.try_register(&signature.name, &id, &self.source_map) .map_err(|line| format!("Duplicate function definition: {}. It's already defined at {}", signature.name, line))?; self.add_function_signature(signature, scope_name_stack)? } FuncDecl(ref signature, ref body) => { - seen_identifiers.try_register(&signature.name, &id, &self.source_map_handle.borrow()) + seen_identifiers.try_register(&signature.name, &id, &self.source_map) .map_err(|line| format!("Duplicate function definition: {}. It's already defined at {}", signature.name, line))?; self.add_function_signature(signature, scope_name_stack)?; scope_name_stack.push(ScopeSegment{ @@ -217,20 +219,21 @@ impl SymbolTable { output? }, TypeDecl { name, body, mutable } => { - seen_identifiers.try_register(&name.name, &id, &self.source_map_handle.borrow()) + seen_identifiers.try_register(&name.name, &id, &self.source_map) .map_err(|line| format!("Duplicate type definition: {}. It's already defined at {}", name.name, line))?; self.add_type_decl(name, body, mutable, scope_name_stack)? }, Binding { name, .. } => { - seen_identifiers.try_register(&name, &id, &self.source_map_handle.borrow()) + seen_identifiers.try_register(&name, &id, &self.source_map) .map_err(|line| format!("Duplicate variable definition: {}. It's already defined at {}", name, line))?; self.add_new_symbol(name, scope_name_stack, SymbolSpec::Binding); } _ => () } }, - Statement { kind: StatementKind::Module(ModuleSpecifier { name, contents}), id } => { - seen_modules.try_register(&name, &id, &self.source_map_handle.borrow()) + Statement { kind: StatementKind::Module(ModuleSpecifier { name, contents}), id, location } => { + self.source_map.add_location(id, *location); + seen_modules.try_register(&name, &id, &self.source_map) .map_err(|line| format!("Duplicate module definition: {}. It's already defined at {}", name, line))?; scope_name_stack.push(ScopeSegment { name: name.clone() }); let output = self.add_symbols_from_scope(contents, scope_name_stack); diff --git a/schala-lang/language/src/symbol_table/test.rs b/schala-lang/language/src/symbol_table/test.rs index d6188e0..fda066e 100644 --- a/schala-lang/language/src/symbol_table/test.rs +++ b/schala-lang/language/src/symbol_table/test.rs @@ -1,14 +1,10 @@ #![cfg(test)] -use std::cell::RefCell; -use std::rc::Rc; - use super::*; use crate::util::quick_ast; fn add_symbols_from_source(src: &str) -> (SymbolTable, Result<(), String>) { - let (ast, source_map) = quick_ast(src); - let source_map = Rc::new(RefCell::new(source_map)); - let mut symbol_table = SymbolTable::new(source_map); + let ast = quick_ast(src); + let mut symbol_table = SymbolTable::new(); let result = symbol_table.add_top_level_symbols(&ast); (symbol_table, result) } diff --git a/schala-lang/language/src/tokenizing.rs b/schala-lang/language/src/tokenizing.rs index 782abfe..e5571fd 100644 --- a/schala-lang/language/src/tokenizing.rs +++ b/schala-lang/language/src/tokenizing.rs @@ -1,7 +1,20 @@ use itertools::Itertools; use std::{iter::{Iterator, Peekable}, convert::TryFrom, rc::Rc, fmt}; +use std::convert::TryInto; -use crate::source_map::Location; +pub type LineNumber = u32; + +#[derive(Debug, Clone, Copy, PartialEq, Default)] +pub struct Location { + pub(crate) line_num: LineNumber, + pub(crate) char_num: usize, +} + +impl fmt::Display for Location { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}:{}", self.line_num, self.char_num) + } +} #[derive(Debug, PartialEq, Clone)] pub enum TokenKind { @@ -95,7 +108,7 @@ impl TryFrom<&str> for Kw { #[derive(Debug, Clone, PartialEq)] pub struct Token { pub kind: TokenKind, - pub location: Location, + pub(crate) location: Location, } impl Token { @@ -171,7 +184,7 @@ pub fn tokenize(input: &str) -> Vec { c if is_operator(&c) => handle_operator(c, &mut input), unknown => Error(format!("Unexpected character: {}", unknown)), }; - let location = Location { line_num, char_num }; + let location = Location { line_num: line_num.try_into().unwrap(), char_num }; tokens.push(Token { kind: cur_tok_kind, location }); } tokens diff --git a/schala-lang/language/src/typechecking.rs b/schala-lang/language/src/typechecking.rs index 7665b59..802edbd 100644 --- a/schala-lang/language/src/typechecking.rs +++ b/schala-lang/language/src/typechecking.rs @@ -464,7 +464,7 @@ mod typechecking_tests { macro_rules! assert_type_in_fresh_context { ($string:expr, $type:expr) => { let mut tc = TypeContext::new(); - let (ref ast, _) = crate::util::quick_ast($string); + let ref ast = crate::util::quick_ast($string); let ty = tc.typecheck(ast).unwrap(); assert_eq!(ty, $type) } diff --git a/schala-lang/language/src/util.rs b/schala-lang/language/src/util.rs index ea0f906..80abf2d 100644 --- a/schala-lang/language/src/util.rs +++ b/schala-lang/language/src/util.rs @@ -48,18 +48,13 @@ impl<'a, T, V> ScopeStack<'a, T, V> where T: Hash + Eq { /// this is intended for use in tests, and does no error-handling whatsoever #[allow(dead_code)] -pub fn quick_ast(input: &str) -> (crate::ast::AST, crate::source_map::SourceMap) { - use std::cell::RefCell; - use std::rc::Rc; - - let source_map = crate::source_map::SourceMap::new(); - let source_map_handle = Rc::new(RefCell::new(source_map)); +pub fn quick_ast(input: &str) -> crate::ast::AST { let tokens = crate::tokenizing::tokenize(input); - let mut parser = crate::parsing::Parser::new(source_map_handle.clone()); + let mut parser = crate::parsing::Parser::new(); parser.add_new_tokens(tokens); let output = parser.parse(); std::mem::drop(parser); - (output.unwrap(), Rc::try_unwrap(source_map_handle).map_err(|_| ()).unwrap().into_inner()) + output.unwrap() } #[allow(unused_macros)]