From f15427e5d9ea4f9c317509dfb1aa35efb3ae6170 Mon Sep 17 00:00:00 2001 From: greg Date: Wed, 6 Sep 2017 05:09:20 -0700 Subject: [PATCH] A bunch of token stuff --- src/schala_lang/mod.rs | 4 +- src/schala_lang/parsing.rs | 92 ++++++++++++++++++++++++++++++-------- 2 files changed, 76 insertions(+), 20 deletions(-) diff --git a/src/schala_lang/mod.rs b/src/schala_lang/mod.rs index 9334f5c..261f1de 100644 --- a/src/schala_lang/mod.rs +++ b/src/schala_lang/mod.rs @@ -31,6 +31,7 @@ impl ProgrammingLanguageInterface for Schala { } }; + /* let ast = match parsing::parse(tokens) { Ok(ast) => { if options.debug_parse { @@ -43,8 +44,9 @@ impl ProgrammingLanguageInterface for Schala { return output; } }; + */ - let evaluation_output = format!("test eval"); + let evaluation_output = format!("{:?}", tokens); output.add_output(evaluation_output); return output; } diff --git a/src/schala_lang/parsing.rs b/src/schala_lang/parsing.rs index acf82e8..d72eb60 100644 --- a/src/schala_lang/parsing.rs +++ b/src/schala_lang/parsing.rs @@ -1,29 +1,30 @@ +extern crate itertools; + use language::{TokenError, ParseError}; use std::rc::Rc; +use std::iter::{Enumerate, Peekable}; +use self::itertools::Itertools; +use std::str::Chars; #[allow(dead_code)] #[derive(Debug)] pub enum TokenType { - Newline, - Semicolon, + Newline, Semicolon, - LParen, - RParen, + LParen, RParen, + LSquareBracket, RSquareBracket, + LAngleBracket, RAngleBracket, + LCurlyBrace, RCurlyBrace, - LSquareBracket, - RSquareBracket, + Comma, Period, Colon, Underscore, - LCurlyBrace, - RCurlyBrace, - - Comma, - Period, - Colon, - Digit(u8), + Operator(Rc), + DigitGroup(Rc), HexNumberSigil, BinNumberSigil, StrLiteral(Rc), Identifier(Rc), Keyword(Kw), - Operator(Rc), + + Error(String), } #[derive(Debug)] @@ -31,18 +32,72 @@ pub enum Kw { If, Else, Func, + For, Loop, } #[derive(Debug)] pub struct Token { token_type: TokenType, - line_number: u32, - char_number: u32, + offset: usize, } -pub fn tokenize(_input: &str) -> Result, TokenError> { - Ok(vec!()) +fn is_digit(c: &char) -> bool { + c.is_digit(10) +} + +type CharIter<'a> = Peekable>>; + +pub fn tokenize(input: &str) -> Result, TokenError> { + use self::TokenType::*; + + let mut tokens: Vec = Vec::new(); + let mut input: CharIter = input.chars().enumerate().peekable(); + + while let Some((idx, c)) = input.next() { + let cur_tok_type = match c { + c if char::is_whitespace(c) && c != '\n' => continue, + '#' => { + if let Some(&(_, '{')) = input.peek() { + } else { + while let Some((_, c)) = input.next() { + if c == '\n' { + break; + } + } + } + continue; + }, + '\n' => Newline, ';' => Semicolon, + ':' => Colon, ',' => Comma, '_' => Underscore, '.' => Period, + '(' => LParen, ')' => RParen, + '{' => LCurlyBrace, '}' => RCurlyBrace, + '<' => LAngleBracket, '>' => RAngleBracket, + '[' => LSquareBracket, ']' => RSquareBracket, + c if is_digit(&c) => handle_digit(c, &mut input), + _ => RSquareBracket, + }; + + tokens.push(Token { token_type: cur_tok_type, offset: idx }); + } + + Ok(tokens) +} + +fn handle_digit(c: char, input: &mut CharIter) -> TokenType { + use self::TokenType::*; + + if c == '0' && input.peek().map_or(false, |&(_, c)| { c == 'x' }) { + input.next(); + HexNumberSigil + } else if c == '0' && input.peek().map_or(false, |&(_, c)| { c == 'b' }) { + input.next(); + BinNumberSigil + } else { + let mut buf = c.to_string(); + buf.extend(input.peeking_take_while(|&(_, ref c)| is_digit(c)).map(|(_, c)| { c })); + DigitGroup(Rc::new(buf)) + } } /* @@ -79,7 +134,6 @@ prototype := identifier '(' identlist ')' identlist := identifier (',' identifier)* | ε - declaration := FN prototype LCurlyBrace (statement)* RCurlyBrace prototype := identifier LParen identlist RParen identlist := Ident (Comma Ident)* | ε