Delete a bunch of now-obsolete parsing/tokenizing code

This commit is contained in:
Greg Shuflin 2021-11-14 03:18:05 -08:00
parent 05e1555a9b
commit 94ee3e1897
12 changed files with 21 additions and 1782 deletions

View File

@ -17,7 +17,7 @@ pub use visitor::*;
use crate::{ use crate::{
derivative::Derivative, derivative::Derivative,
identifier::{define_id_kind, Id}, identifier::{define_id_kind, Id},
tokenizing::Location, parsing::Location,
}; };
define_id_kind!(ASTItem); define_id_kind!(ASTItem);

View File

@ -1,7 +1,5 @@
use std::rc::Rc; use std::rc::Rc;
use crate::tokenizing::TokenKind;
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub struct PrefixOp { pub struct PrefixOp {
sigil: Rc<String>, sigil: Rc<String>,
@ -15,10 +13,6 @@ impl PrefixOp {
pub fn sigil(&self) -> &str { pub fn sigil(&self) -> &str {
&self.sigil &self.sigil
} }
pub fn is_prefix(op: &str) -> bool {
matches!(op, "+" | "-" | "!")
}
} }
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
@ -35,38 +29,14 @@ impl BinOp {
&self.sigil &self.sigil
} }
pub fn from_sigil_token(tok: &TokenKind) -> Option<BinOp> {
let s = token_kind_to_sigil(tok)?;
Some(BinOp::from_sigil(s))
}
pub fn min_precedence() -> i32 { pub fn min_precedence() -> i32 {
i32::min_value() i32::min_value()
} }
pub fn get_precedence_from_token(op_tok: &TokenKind) -> Option<i32> {
let s = token_kind_to_sigil(op_tok)?;
Some(binop_precedences(s))
}
pub fn get_precedence(&self) -> i32 { pub fn get_precedence(&self) -> i32 {
binop_precedences(self.sigil.as_ref()) binop_precedences(self.sigil.as_ref())
} }
} }
fn token_kind_to_sigil(tok: &TokenKind) -> Option<&str> {
use self::TokenKind::*;
Some(match tok {
Operator(op) => op.as_str(),
Period => ".",
Pipe => "|",
Slash => "/",
LAngleBracket => "<",
RAngleBracket => ">",
Equals => "=",
_ => return None,
})
}
fn binop_precedences(s: &str) -> i32 { fn binop_precedences(s: &str) -> i32 {
let default = 10_000_000; let default = 10_000_000;
match s { match s {

View File

@ -1,8 +1,7 @@
use crate::{ use crate::{
parsing::ParseError, parsing::{Location, ParseError},
schala::{SourceReference, Stage}, schala::{SourceReference, Stage},
symbol_table::SymbolError, symbol_table::SymbolError,
tokenizing::{Location, Token, TokenKind},
type_inference::TypeError, type_inference::TypeError,
}; };
@ -52,26 +51,6 @@ impl SchalaError {
errors: vec![], errors: vec![],
} }
} }
pub(crate) fn from_tokens(tokens: &[Token]) -> Option<SchalaError> {
let token_errors: Vec<Error> = tokens
.iter()
.filter_map(|tok| match tok.kind {
TokenKind::Error(ref err) => Some(Error {
location: Some(tok.location),
text: Some(err.clone()),
stage: Stage::Tokenizing,
}),
_ => None,
})
.collect();
if token_errors.is_empty() {
None
} else {
Some(SchalaError { errors: token_errors, formatted_parse_error: None })
}
}
} }
#[allow(dead_code)] #[allow(dead_code)]

View File

@ -7,7 +7,6 @@
//! `ProgrammingLanguageInterface` and the chain of compiler passes for it. //! `ProgrammingLanguageInterface` and the chain of compiler passes for it.
extern crate schala_repl; extern crate schala_repl;
#[macro_use]
extern crate schala_lang_codegen; extern crate schala_lang_codegen;
extern crate derivative; extern crate derivative;
@ -19,7 +18,6 @@ mod type_inference;
mod ast; mod ast;
mod parsing; mod parsing;
mod tokenizing;
#[macro_use] #[macro_use]
mod symbol_table; mod symbol_table;
mod builtin; mod builtin;

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,6 @@ use crate::{
ast::*, ast::*,
identifier::{Id, IdStore}, identifier::{Id, IdStore},
parsing::ParseError, parsing::ParseError,
schala::SourceReference,
}; };
fn rc_string(s: &str) -> Rc<String> { fn rc_string(s: &str) -> Rc<String> {
@ -30,10 +29,6 @@ impl Parser {
ParseError { ParseError {
msg, msg,
location: err.location.offset.into(), location: err.location.offset.into(),
token: crate::tokenizing::Token {
kind: crate::tokenizing::TokenKind::Semicolon,
location: Default::default(),
},
} }
}) })
} }

View File

@ -7,7 +7,7 @@ use std::{fmt::Write, rc::Rc};
use pretty_assertions::assert_eq; use pretty_assertions::assert_eq;
use super::new::{schala_parser, Parser}; use super::new::{schala_parser, Parser};
use crate::{ast::*, tokenizing::Location}; use crate::{ast::*, parsing::Location};
fn rc(s: &str) -> Rc<String> { fn rc(s: &str) -> Rc<String> {
Rc::new(s.to_owned()) Rc::new(s.to_owned())

View File

@ -5,7 +5,7 @@ use schala_repl::{
use stopwatch::Stopwatch; use stopwatch::Stopwatch;
use crate::{ use crate::{
error::SchalaError, parsing, reduced_ir, symbol_table, tokenizing, tree_walk_eval, type_inference, error::SchalaError, parsing, reduced_ir, symbol_table, tree_walk_eval, type_inference,
}; };
/// All the state necessary to parse and execute a Schala program are stored in this struct. /// All the state necessary to parse and execute a Schala program are stored in this struct.
@ -158,7 +158,6 @@ impl SourceReference {
#[allow(dead_code)] #[allow(dead_code)]
#[derive(Clone, Copy, Debug)] #[derive(Clone, Copy, Debug)]
pub(crate) enum Stage { pub(crate) enum Stage {
Tokenizing,
Parsing, Parsing,
Symbols, Symbols,
ScopeResolution, ScopeResolution,
@ -168,7 +167,7 @@ pub(crate) enum Stage {
} }
fn stage_names() -> Vec<&'static str> { fn stage_names() -> Vec<&'static str> {
vec!["tokenizing", "parsing", "symbol-table", "typechecking", "ast-reduction", "ast-walking-evaluation"] vec!["parsing", "symbol-table", "typechecking", "ast-reduction", "ast-walking-evaluation"]
} }
#[derive(Default, Clone)] #[derive(Default, Clone)]

View File

@ -10,7 +10,7 @@ use crate::{
ast, ast,
ast::ItemId, ast::ItemId,
builtin::Builtin, builtin::Builtin,
tokenizing::Location, parsing::Location,
type_inference::{TypeContext, TypeId}, type_inference::{TypeContext, TypeId},
}; };

View File

@ -11,7 +11,7 @@ use crate::{
TypeSingletonName, Variant, VariantKind, AST, TypeSingletonName, Variant, VariantKind, AST,
}, },
builtin::Builtin, builtin::Builtin,
tokenizing::Location, parsing::Location,
type_inference::{self, PendingType, TypeBuilder, TypeContext, VariantBuilder}, type_inference::{self, PendingType, TypeBuilder, TypeContext, VariantBuilder},
}; };

View File

@ -2,7 +2,7 @@
use assert_matches::assert_matches; use assert_matches::assert_matches;
use super::*; use super::*;
use crate::{tokenizing::Location, util::quick_ast}; use crate::util::quick_ast;
fn add_symbols(src: &str) -> (SymbolTable, Result<(), Vec<SymbolError>>) { fn add_symbols(src: &str) -> (SymbolTable, Result<(), Vec<SymbolError>>) {
let ast = quick_ast(src); let ast = quick_ast(src);
@ -79,7 +79,7 @@ fn no_type_definition_duplicates() {
let err = &errs[0]; let err = &errs[0];
match err { match err {
SymbolError::DuplicateName { location, prev_name } => { SymbolError::DuplicateName { location: _, prev_name } => {
assert_eq!(prev_name, &Fqsn::from_strs(&["Food"])); assert_eq!(prev_name, &Fqsn::from_strs(&["Food"]));
//TODO restore this Location test //TODO restore this Location test

View File

@ -1,464 +0,0 @@
#![allow(clippy::upper_case_acronyms)]
use std::{
convert::{TryFrom, TryInto},
fmt,
iter::{Iterator, Peekable},
rc::Rc,
};
use itertools::Itertools;
/// A location in a particular source file. Note that the
/// sizes of the internal unsigned integer types limit
/// the size of a source file to 2^32 lines of
/// at most 2^16 characters, which should be plenty big.
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub struct Location {
pub(crate) offset: usize,
}
impl From<usize> for Location {
fn from(offset: usize) -> Self {
Self { offset }
}
}
impl fmt::Display for Location {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.offset)
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum TokenKind {
Newline,
Semicolon,
LParen,
RParen,
LSquareBracket,
RSquareBracket,
LAngleBracket,
RAngleBracket,
LCurlyBrace,
RCurlyBrace,
Pipe,
Backslash,
AtSign,
Comma,
Period,
Colon,
Underscore,
Slash,
Equals,
Operator(Rc<String>),
DigitGroup(Rc<String>),
HexLiteral(Rc<String>),
BinNumberSigil,
StrLiteral { s: Rc<String>, prefix: Option<Rc<String>> },
Identifier(Rc<String>),
Keyword(Kw),
EOF,
Error(String),
}
use self::TokenKind::*;
impl fmt::Display for TokenKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
&Operator(ref s) => write!(f, "Operator({})", **s),
&DigitGroup(ref s) => write!(f, "DigitGroup({})", s),
&HexLiteral(ref s) => write!(f, "HexLiteral({})", s),
&StrLiteral { ref s, .. } => write!(f, "StrLiteral({})", s),
&Identifier(ref s) => write!(f, "Identifier({})", s),
&Error(ref s) => write!(f, "Error({})", s),
other => write!(f, "{:?}", other),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Kw {
If,
Then,
Else,
Is,
Func,
For,
While,
Let,
In,
Mut,
Return,
Continue,
Break,
Alias,
Type,
SelfType,
SelfIdent,
Interface,
Impl,
True,
False,
Module,
Import,
}
impl TryFrom<&str> for Kw {
type Error = ();
fn try_from(value: &str) -> Result<Self, Self::Error> {
Ok(match value {
"if" => Kw::If,
"then" => Kw::Then,
"else" => Kw::Else,
"is" => Kw::Is,
"fn" => Kw::Func,
"for" => Kw::For,
"while" => Kw::While,
"let" => Kw::Let,
"in" => Kw::In,
"mut" => Kw::Mut,
"return" => Kw::Return,
"break" => Kw::Break,
"continue" => Kw::Continue,
"alias" => Kw::Alias,
"type" => Kw::Type,
"Self" => Kw::SelfType,
"self" => Kw::SelfIdent,
"interface" => Kw::Interface,
"impl" => Kw::Impl,
"true" => Kw::True,
"false" => Kw::False,
"module" => Kw::Module,
"import" => Kw::Import,
_ => return Err(()),
})
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Token {
pub kind: TokenKind,
pub(crate) location: Location,
}
impl Token {
pub fn to_string_with_metadata(&self) -> String {
format!("{}({})", self.kind, self.location)
}
pub fn get_kind(&self) -> TokenKind {
self.kind.clone()
}
}
const OPERATOR_CHARS: [char; 17] =
['!', '$', '%', '&', '*', '+', '-', '.', ':', '<', '>', '=', '?', '^', '|', '~', '`'];
fn is_operator(c: &char) -> bool {
OPERATOR_CHARS.iter().any(|x| x == c)
}
type CharData = (usize, usize, char);
pub fn tokenize(input: &str) -> Vec<Token> {
let mut tokens: Vec<Token> = Vec::new();
let mut input = Iterator::intersperse(input.lines().enumerate(), (0, "\n"))
.flat_map(|(line_idx, line)| line.chars().enumerate().map(move |(ch_idx, ch)| (line_idx, ch_idx, ch)))
.peekable();
while let Some((line_num, char_num, c)) = input.next() {
let cur_tok_kind = match c {
'/' => match input.peek().map(|t| t.2) {
Some('/') => {
for (_, _, c) in input.by_ref() {
if c == '\n' {
break;
}
}
continue;
}
Some('*') => {
input.next();
let mut comment_level = 1;
while let Some((_, _, c)) = input.next() {
if c == '*' && input.peek().map(|t| t.2) == Some('/') {
input.next();
comment_level -= 1;
} else if c == '/' && input.peek().map(|t| t.2) == Some('*') {
input.next();
comment_level += 1;
}
if comment_level == 0 {
break;
}
}
if comment_level != 0 {
Error("Unclosed comment".to_string())
} else {
continue;
}
}
_ => Slash,
},
c if c.is_whitespace() && c != '\n' => continue,
'\n' => Newline,
';' => Semicolon,
':' => Colon,
',' => Comma,
'(' => LParen,
')' => RParen,
'{' => LCurlyBrace,
'}' => RCurlyBrace,
'[' => LSquareBracket,
']' => RSquareBracket,
'"' => handle_quote(&mut input, None),
'\\' => Backslash,
'@' => AtSign,
c if c.is_digit(10) => handle_digit(c, &mut input),
c if c.is_alphabetic() || c == '_' => handle_alphabetic(c, &mut input),
c if is_operator(&c) => handle_operator(c, &mut input),
unknown => Error(format!("Unexpected character: {}", unknown)),
};
let location = Location { offset: 0 };
tokens.push(Token { kind: cur_tok_kind, location });
}
tokens
}
fn handle_digit(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
let next_ch = input.peek().map(|&(_, _, c)| c);
if c == '0' && next_ch == Some('x') {
input.next();
let rest: String = input
.peeking_take_while(|&(_, _, ref c)| c.is_digit(16) || *c == '_')
.map(|(_, _, c)| c)
.collect();
HexLiteral(Rc::new(rest))
} else if c == '0' && next_ch == Some('b') {
input.next();
BinNumberSigil
} else {
let mut buf = c.to_string();
buf.extend(input.peeking_take_while(|&(_, _, ref c)| c.is_digit(10)).map(|(_, _, c)| c));
DigitGroup(Rc::new(buf))
}
}
fn handle_quote(
input: &mut Peekable<impl Iterator<Item = CharData>>,
quote_prefix: Option<&str>,
) -> TokenKind {
let mut buf = String::new();
loop {
match input.next().map(|(_, _, c)| c) {
Some('"') => break,
Some('\\') => {
let next = input.peek().map(|&(_, _, c)| c);
if next == Some('n') {
input.next();
buf.push('\n')
} else if next == Some('"') {
input.next();
buf.push('"');
} else if next == Some('t') {
input.next();
buf.push('\t');
}
}
Some(c) => buf.push(c),
None => return TokenKind::Error("Unclosed string".to_string()),
}
}
TokenKind::StrLiteral { s: Rc::new(buf), prefix: quote_prefix.map(|s| Rc::new(s.to_string())) }
}
fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
let mut buf = String::new();
buf.push(c);
let next_is_alphabetic = input.peek().map(|&(_, _, c)| !c.is_alphabetic()).unwrap_or(true);
if c == '_' && next_is_alphabetic {
return TokenKind::Underscore;
}
loop {
match input.peek().map(|&(_, _, c)| c) {
Some(c) if c == '"' => {
input.next();
return handle_quote(input, Some(&buf));
}
Some(c) if c.is_alphanumeric() || c == '_' => {
input.next();
buf.push(c);
}
_ => break,
}
}
match Kw::try_from(buf.as_str()) {
Ok(kw) => TokenKind::Keyword(kw),
Err(()) => TokenKind::Identifier(Rc::new(buf)),
}
}
fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
match c {
'<' | '>' | '|' | '.' | '=' => {
let next = &input.peek().map(|&(_, _, c)| c);
let next_is_op = next.map(|n| is_operator(&n)).unwrap_or(false);
if !next_is_op {
return match c {
'<' => LAngleBracket,
'>' => RAngleBracket,
'|' => Pipe,
'.' => Period,
'=' => Equals,
_ => unreachable!(),
};
}
}
_ => (),
};
let mut buf = String::new();
if c == '`' {
loop {
match input.peek().map(|&(_, _, c)| c) {
Some(c) if c.is_alphabetic() || c == '_' => {
input.next();
buf.push(c);
}
Some('`') => {
input.next();
break;
}
_ => break,
}
}
} else {
buf.push(c);
loop {
match input.peek().map(|&(_, _, c)| c) {
Some(c) if is_operator(&c) => {
input.next();
buf.push(c);
}
_ => break,
}
}
}
TokenKind::Operator(Rc::new(buf))
}
#[cfg(test)]
mod schala_tokenizer_tests {
use super::{Kw::*, *};
macro_rules! digit {
($ident:expr) => {
DigitGroup(Rc::new($ident.to_string()))
};
}
macro_rules! ident {
($ident:expr) => {
Identifier(Rc::new($ident.to_string()))
};
}
macro_rules! op {
($ident:expr) => {
Operator(Rc::new($ident.to_string()))
};
}
fn token_kinds(input: &str) -> Vec<TokenKind> {
tokenize(input).into_iter().map(move |tok| tok.kind).collect()
}
#[test]
fn tokens() {
let output = token_kinds("let a: A<B> = c ++ d");
assert_eq!(
output,
vec![
Keyword(Let),
ident!("a"),
Colon,
ident!("A"),
LAngleBracket,
ident!("B"),
RAngleBracket,
Equals,
ident!("c"),
op!("++"),
ident!("d")
]
);
}
#[test]
fn underscores() {
let output = token_kinds("4_8");
assert_eq!(output, vec![digit!("4"), Underscore, digit!("8")]);
let output = token_kinds("aba_yo");
assert_eq!(output, vec![ident!("aba_yo")]);
}
#[test]
fn comments() {
let output = token_kinds("1 + /* hella /* bro */ */ 2");
assert_eq!(output, vec![digit!("1"), op!("+"), digit!("2")]);
let output = token_kinds("1 + /* hella /* bro */ 2");
assert_eq!(output, vec![digit!("1"), op!("+"), Error("Unclosed comment".to_string())]);
//TODO not sure if I want this behavior
let output = token_kinds("1 + /* hella */ bro */ 2");
assert_eq!(
output,
vec![
digit!("1"),
op!("+"),
Identifier(Rc::new("bro".to_string())),
Operator(Rc::new("*".to_string())),
Slash,
DigitGroup(Rc::new("2".to_string()))
]
);
}
#[test]
fn backtick_operators() {
let output = token_kinds("1 `plus` 2");
assert_eq!(output, vec![digit!("1"), op!("plus"), digit!("2")]);
}
#[test]
fn string_literals() {
let output = token_kinds(r#""some string""#);
assert_eq!(output, vec![StrLiteral { s: Rc::new("some string".to_string()), prefix: None }]);
let output = token_kinds(r#"b"some bytestring""#);
assert_eq!(
output,
vec![StrLiteral {
s: Rc::new("some bytestring".to_string()),
prefix: Some(Rc::new("b".to_string()))
}]
);
let output = token_kinds(r#""Do \n \" escapes work\t""#);
assert_eq!(
output,
vec![StrLiteral { s: Rc::new("Do \n \" escapes work\t".to_string()), prefix: None }]
);
}
}