First pass at putting EBNF grammar into rustdoc
This commit is contained in:
parent
a8b9f5046e
commit
6b47ecf2d7
@ -47,7 +47,7 @@ mod builtin;
|
|||||||
mod reduced_ast;
|
mod reduced_ast;
|
||||||
mod eval;
|
mod eval;
|
||||||
|
|
||||||
/// All bits of state necessary to parse and execute a Schala program are stored in this struct.
|
/// All the state necessary to parse and execute a Schala program are stored in this struct.
|
||||||
/// `state` represents the execution state for the AST-walking interpreter, the other fields
|
/// `state` represents the execution state for the AST-walking interpreter, the other fields
|
||||||
/// should be self-explanatory.
|
/// should be self-explanatory.
|
||||||
pub struct Schala {
|
pub struct Schala {
|
||||||
|
@ -1,3 +1,151 @@
|
|||||||
|
//! # Parsing
|
||||||
|
//! This module is where the recursive-descent parsing methods live.
|
||||||
|
//!
|
||||||
|
//!
|
||||||
|
//! # Schala EBNF Grammar
|
||||||
|
//! Terminal productions are in 'single quotes' or UPPERCASE if they are a class of tokens,
|
||||||
|
//! or otherwise not representable in ASCII.
|
||||||
|
//!
|
||||||
|
//! ## Top level structure
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! program := (statement delimiter)* EOF
|
||||||
|
//! delimiter := NEWLINE | ";"
|
||||||
|
//! statement := expression | declaration
|
||||||
|
//! block := "{" (statement delimiter)* "}"
|
||||||
|
//! declaration := type_declaration | func_declaration | binding_declaration | impl_declaration
|
||||||
|
//! ```
|
||||||
|
//! ## Declarations - Types
|
||||||
|
//! ```
|
||||||
|
//! type_declaration := "type" type_declaration_body
|
||||||
|
//! type_declaration_body := "alias" type_alias | "mut"? type_singleton_name "=" type_body
|
||||||
|
//! type_alias := IDENTIFIER "=" type_name
|
||||||
|
//! type_body := variant_specifier ("|" variant_specifier)*
|
||||||
|
//! variant_specifier := IDENTIFIER | IDENTIFIER "{" typed_identifier_list "}" | IDENTIFIER "(" type_name* ")"
|
||||||
|
//! typed_identifier_list := typed_identifier*
|
||||||
|
//! typed_identifier := IDENTIFIER type_anno
|
||||||
|
//! ```
|
||||||
|
//! ## Declaration - Functions
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! func_declaration := func_signature func_body
|
||||||
|
//! func_body := ε | nonempty_func_body
|
||||||
|
//! nonempty_func_body := "{" (statement delimiter)* "}"
|
||||||
|
//! func_signature := "fn" func_name formal_param_list type_anno+
|
||||||
|
//! func_name := IDENTIFIER | operator
|
||||||
|
//! formal_param_list := "(" (formal_param ",")* ")"
|
||||||
|
//! formal_param := IDENTIFIER type_anno+
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! ## Declaration - Variable bindings
|
||||||
|
//! ```binding_declaration := "let" "mut"? IDENTIFIER "=" expresion```
|
||||||
|
//!
|
||||||
|
//! ## Declaration - Interface
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! interface_declaration := "interface" type_singleton_name signature_block
|
||||||
|
//! impl_declaration := "impl" type_singleton_name decl_block | "impl" type_singleton_name "for" type_name decl_block
|
||||||
|
//! decl_block := "{" (func_declaration)* "}"
|
||||||
|
//! signature_block := "{" (func_signature)* "}"
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! ## Type Annotations
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! type_anno := (":" type_name)+
|
||||||
|
//! type_name := type_singleton_name | "(" type_names ")"
|
||||||
|
//! type_names := ε | type_name (, type_name)*
|
||||||
|
//! type_singleton_name = IDENTIFIER (type_params)*
|
||||||
|
//! type_params := "<" type_name (, type_name)* ">"
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! ## Expressions
|
||||||
|
//! ```
|
||||||
|
//! expression := precedence_expr type_anno+
|
||||||
|
//! precedence_expr := prefix_expr
|
||||||
|
//! prefix_expr := prefix_op call_expr
|
||||||
|
//! prefix_op := "+" | "-" | "!" | "~"
|
||||||
|
//! call_expr := index_expr ( "(" expr_list ")" )* | ε
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! expr_list := expression ("," expression)* | ε
|
||||||
|
//! index_expr := primary ( "[" (expression ("," (expression)* | ε) "]" )*
|
||||||
|
//! primary := literal | paren_expr | if_expr | for_expr | while_expr | identifier_expr | lambda_expr | anonymous_struct | list_expr
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! ## Primary expressions
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! list_expr := "[" (expression, ",")* "]"
|
||||||
|
//! lambda_expr := "\\" lambda_param_list type_anno+ nonempty_func_body
|
||||||
|
//! lambda_param_list := formal_param_list | formal_param
|
||||||
|
//! paren_expr := LParen paren_inner RParen
|
||||||
|
//! paren_inner := (expression ",")*
|
||||||
|
//! identifier_expr := named_struct | IDENTIFIER
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! Expression literals
|
||||||
|
//! ```
|
||||||
|
//! literal := "true" | "false" | number_literal | STR_LITERAL
|
||||||
|
//! named_struct := IDENTIFIER record_block
|
||||||
|
//! record_block := "{" (record_entry, ",")* | "}" //TODO support anonymus structs, update syntax
|
||||||
|
//! record_entry := IDENTIFIER ":" expression
|
||||||
|
//! anonymous_struct := TODO
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! A `float_literal` can still be assigned to an int in type-checking
|
||||||
|
//! ```
|
||||||
|
//! number_literal := int_literal | float_literal
|
||||||
|
//! int_literal = ("0x" | "0b") digits
|
||||||
|
//! float_literal := digits ("." digits)
|
||||||
|
//! digits := (DIGIT_GROUP underscore)+
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! ## Patterns
|
||||||
|
//! ```
|
||||||
|
//! pattern := "(" (pattern, ",")* ")" | simple_pattern
|
||||||
|
//! simple_pattern := pattern_literal | record_pattern | tuple_struct_pattern
|
||||||
|
//! pattern_literal := "true" | "false" | signed_number_literal | STR_LITERAL | IDENTIFIER
|
||||||
|
//! signed_number_literal := "-"? number_literal
|
||||||
|
//! record_pattern := IDENTIFIER "{" (record_pattern_entry, ",")* "}"
|
||||||
|
//! record_pattern_entry := IDENTIFIER | IDENTIFIER ":" Pattern
|
||||||
|
//! tuple_struct_pattern := IDENTIFIER "(" (pattern, ",")* ")"
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! expr_or_block := "{" (statement delimiter)* "}" | expr
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! ## If-expressions
|
||||||
|
//! ```
|
||||||
|
//! if_expr := "if" discriminator ("then" condititional | "is" simple_pattern_match | guard_block)
|
||||||
|
//! discriminator := precedence_expr (operator)+
|
||||||
|
//! conditional := expr_or_block else_clause
|
||||||
|
//! simple_pattern_match := pattern "then" conditional
|
||||||
|
//! else_clause := ε | "else" expr_or_block
|
||||||
|
//! guard_block := "{" (guard_arm, ",")* "}"
|
||||||
|
//! guard_arm := guard "->" expr_or_block
|
||||||
|
//! guard := "is" pattern | (operator)+ precedence_expr
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! ## While expressions
|
||||||
|
//! ```
|
||||||
|
//! while_expr := "while" while_cond "{" (statement delimiter)* "}"
|
||||||
|
//! while_cond := ε | expression | expression "is" pattern //TODO maybe is-expresions should be primary
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! //TODO this implies there must be at least one enumerator, which the parser doesn"t support right
|
||||||
|
//! //this second, and maybe should fail later anyway
|
||||||
|
//! ## For-expressions
|
||||||
|
//! ```
|
||||||
|
//! for_expr := "for" (enumerator | "{" enumerators "}") for_expr_body
|
||||||
|
//! for_expr_body := "return" expression | "{" (statement delimiter)* "}"
|
||||||
|
//! enumerators := enumerator ("," enumerators)*
|
||||||
|
//! enumerator := identifier "<-" expression | identifier "=" expression //TODO add guards, etc.
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
use std::vec::IntoIter;
|
use std::vec::IntoIter;
|
||||||
@ -10,6 +158,7 @@ use crate::ast::*;
|
|||||||
|
|
||||||
use crate::builtin::{BinOp, PrefixOp};
|
use crate::builtin::{BinOp, PrefixOp};
|
||||||
|
|
||||||
|
/// Represents a parsing error
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct ParseError {
|
pub struct ParseError {
|
||||||
pub msg: String,
|
pub msg: String,
|
||||||
@ -22,6 +171,7 @@ impl ParseError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Represents either a successful parsing result or a ParseError
|
||||||
pub type ParseResult<T> = Result<T, ParseError>;
|
pub type ParseResult<T> = Result<T, ParseError>;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@ -31,6 +181,7 @@ pub struct ParseRecord {
|
|||||||
level: u32,
|
level: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Main data structure for doing parsing.
|
||||||
pub struct Parser {
|
pub struct Parser {
|
||||||
token_handler: TokenHandler,
|
token_handler: TokenHandler,
|
||||||
parse_record: Vec<ParseRecord>,
|
parse_record: Vec<ParseRecord>,
|
||||||
@ -69,6 +220,7 @@ impl TokenHandler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Parser {
|
impl Parser {
|
||||||
|
/// Create a new parser initialized with some tokens.
|
||||||
pub fn new(initial_input: Vec<Token>) -> Parser {
|
pub fn new(initial_input: Vec<Token>) -> Parser {
|
||||||
Parser {
|
Parser {
|
||||||
token_handler: TokenHandler::new(initial_input),
|
token_handler: TokenHandler::new(initial_input),
|
||||||
@ -78,6 +230,7 @@ impl Parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse all loaded tokens up to this point.
|
||||||
pub fn parse(&mut self) -> ParseResult<AST> {
|
pub fn parse(&mut self) -> ParseResult<AST> {
|
||||||
self.program()
|
self.program()
|
||||||
}
|
}
|
||||||
@ -155,128 +308,10 @@ macro_rules! delimited {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Schala EBNF Grammar */
|
|
||||||
/* Terminal productions are in 'single quotes' or UPPERCASE if they are a class
|
|
||||||
* or not representable in ASCII
|
|
||||||
|
|
||||||
/* Top-level Structure */
|
|
||||||
|
|
||||||
program := (statement delimiter)* EOF
|
|
||||||
delimiter := NEWLINE | ';'
|
|
||||||
statement := expression | declaration
|
|
||||||
block := '{' (statement delimiter)* '}'
|
|
||||||
|
|
||||||
declaration := type_declaration | func_declaration | binding_declaration | impl_declaration
|
|
||||||
|
|
||||||
/* Declarations - Types */
|
|
||||||
|
|
||||||
type_declaration := 'type' type_declaration_body
|
|
||||||
type_declaration_body := 'alias' type_alias | 'mut'? type_singleton_name '=' type_body
|
|
||||||
type_alias := IDENTIFIER '=' type_name
|
|
||||||
type_body := variant_specifier ('|' variant_specifier)*
|
|
||||||
variant_specifier := IDENTIFIER | IDENTIFIER '{' typed_identifier_list '}' | IDENTIFIER '(' type_name* ')'
|
|
||||||
typed_identifier_list := typed_identifier*
|
|
||||||
typed_identifier := IDENTIFIER type_anno
|
|
||||||
|
|
||||||
/* Declaration - Functions */
|
|
||||||
|
|
||||||
func_declaration := func_signature func_body
|
|
||||||
func_body := ε | nonempty_func_body
|
|
||||||
nonempty_func_body := '{' (statement delimiter)* '}'
|
|
||||||
func_signature := 'fn' func_name formal_param_list type_anno+
|
|
||||||
func_name := IDENTIFIER | operator
|
|
||||||
formal_param_list := '(' (formal_param ',')* ')'
|
|
||||||
formal_param := IDENTIFIER type_anno+
|
|
||||||
|
|
||||||
/* Declaration - Variable bindings */
|
|
||||||
binding_declaration := 'let' 'mut'? IDENTIFIER '=' expresion
|
|
||||||
|
|
||||||
/* Declaration - Interface */
|
|
||||||
|
|
||||||
interface_declaration := 'interface' type_singleton_name signature_block
|
|
||||||
impl_declaration := 'impl' type_singleton_name decl_block | 'impl' type_singleton_name 'for' type_name decl_block
|
|
||||||
decl_block := '{' (func_declaration)* '}'
|
|
||||||
signature_block := '{' (func_signature)* '}'
|
|
||||||
|
|
||||||
/* Type annotations */
|
|
||||||
|
|
||||||
type_anno := (':' type_name)+
|
|
||||||
type_name := type_singleton_name | '(' type_names ')'
|
|
||||||
type_names := ε | type_name (, type_name)*
|
|
||||||
type_singleton_name = IDENTIFIER (type_params)*
|
|
||||||
type_params := '<' type_name (, type_name)* '>'
|
|
||||||
|
|
||||||
|
|
||||||
/* Expressions */
|
|
||||||
|
|
||||||
expression := precedence_expr type_anno+
|
|
||||||
precedence_expr := prefix_expr
|
|
||||||
prefix_expr := prefix_op call_expr
|
|
||||||
prefix_op := '+' | '-' | '!' | '~'
|
|
||||||
call_expr := index_expr ( '(' expr_list ')' )* | ε
|
|
||||||
|
|
||||||
expr_list := expression (',' expression)* | ε
|
|
||||||
index_expr := primary ( '[' (expression (',' (expression)* | ε) ']' )*
|
|
||||||
primary := literal | paren_expr | if_expr | for_expr | while_expr | identifier_expr | lambda_expr | anonymous_struct | list_expr
|
|
||||||
|
|
||||||
/* Primary Expressions */
|
|
||||||
|
|
||||||
list_expr := '[' (expression, ',')* ']'
|
|
||||||
lambda_expr := '\' lambda_param_list type_anno+ nonempty_func_body
|
|
||||||
lambda_param_list := formal_param_list | formal_param
|
|
||||||
paren_expr := LParen paren_inner RParen
|
|
||||||
paren_inner := (expression ',')*
|
|
||||||
identifier_expr := named_struct | IDENTIFIER
|
|
||||||
|
|
||||||
/* Expression - Literals */
|
|
||||||
|
|
||||||
literal := 'true' | 'false' | number_literal | STR_LITERAL
|
|
||||||
named_struct := IDENTIFIER record_block
|
|
||||||
record_block := '{' (record_entry, ',')* | '}' //TODO support anonymus structs, update syntax
|
|
||||||
record_entry := IDENTIFIER ':' expression
|
|
||||||
anonymous_struct := TODO
|
|
||||||
|
|
||||||
// a float_literal can still be assigned to an int in type-checking
|
|
||||||
number_literal := int_literal | float_literal
|
|
||||||
int_literal = ('0x' | '0b') digits
|
|
||||||
float_literal := digits ('.' digits)
|
|
||||||
digits := (DIGIT_GROUP underscore)+
|
|
||||||
|
|
||||||
/* Pattern syntax */
|
|
||||||
pattern := '(' (pattern, ',')* ')' | simple_pattern
|
|
||||||
simple_pattern := pattern_literal | record_pattern | tuple_struct_pattern
|
|
||||||
pattern_literal := 'true' | 'false' | signed_number_literal | STR_LITERAL | IDENTIFIER
|
|
||||||
signed_number_literal := '-'? number_literal
|
|
||||||
record_pattern := IDENTIFIER '{' (record_pattern_entry, ',')* '}'
|
|
||||||
record_pattern_entry := IDENTIFIER | IDENTIFIER ':' Pattern
|
|
||||||
tuple_struct_pattern := IDENTIFIER '(' (pattern, ',')* ')'
|
|
||||||
|
|
||||||
expr_or_block := '{' (statement delimiter)* '}' | expr
|
|
||||||
|
|
||||||
/* Expression - If */
|
|
||||||
if_expr := 'if' discriminator ('then' condititional | 'is' simple_pattern_match | guard_block)
|
|
||||||
discriminator := precedence_expr (operator)+
|
|
||||||
conditional := expr_or_block else_clause
|
|
||||||
simple_pattern_match := pattern 'then' conditional
|
|
||||||
else_clause := ε | 'else' expr_or_block
|
|
||||||
guard_block := '{' (guard_arm, ',')* '}'
|
|
||||||
guard_arm := guard '->' expr_or_block
|
|
||||||
guard := 'is' pattern | (operator)+ precedence_expr
|
|
||||||
|
|
||||||
/* Expression - While */
|
|
||||||
while_expr := 'while' while_cond '{' (statement delimiter)* '}'
|
|
||||||
while_cond := ε | expression | expression 'is' pattern //TODO maybe is-expresions should be primary
|
|
||||||
|
|
||||||
//TODO this implies there must be at least one enumerator, which the parser doesn't support right
|
|
||||||
//this second, and maybe should fail later anyway
|
|
||||||
/* Expression - For */
|
|
||||||
for_expr := 'for' (enumerator | '{' enumerators '}') for_expr_body
|
|
||||||
for_expr_body := 'return' expression | '{' (statement delimiter)* '}
|
|
||||||
enumerators := enumerator (',' enumerators)*
|
|
||||||
enumerator := identifier '<-' expression | identifier '=' expression //TODO add guards, etc.
|
|
||||||
*/
|
|
||||||
|
|
||||||
impl Parser {
|
impl Parser {
|
||||||
|
/// `program := (statement delimiter)* EOF`
|
||||||
|
/// `delimiter := NEWLINE | ';'`
|
||||||
#[recursive_descent_method]
|
#[recursive_descent_method]
|
||||||
fn program(&mut self) -> ParseResult<AST> {
|
fn program(&mut self) -> ParseResult<AST> {
|
||||||
let mut statements = Vec::new();
|
let mut statements = Vec::new();
|
||||||
@ -295,6 +330,7 @@ impl Parser {
|
|||||||
Ok(AST(statements))
|
Ok(AST(statements))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// `statement := expression | declaration`
|
||||||
#[recursive_descent_method]
|
#[recursive_descent_method]
|
||||||
fn statement(&mut self) -> ParseResult<Statement> {
|
fn statement(&mut self) -> ParseResult<Statement> {
|
||||||
//TODO handle error recovery here
|
//TODO handle error recovery here
|
||||||
|
Loading…
Reference in New Issue
Block a user