Run rustfmt on tokenizer code

2021-10-29 19:03:42 -07:00 · 2021-10-29 19:03:42 -07:00 · 8111f69640
commit 8111f69640
parent 304df5c50e
1 changed files with 367 additions and 288 deletions
--- a/schala-lang/language/src/tokenizing.rs
+++ b/schala-lang/language/src/tokenizing.rs
@ -1,8 +1,13 @@
 #![allow(clippy::upper_case_acronyms)]
 use std::{
    convert::{TryFrom, TryInto},
    fmt,
    iter::{Iterator, Peekable},
    rc::Rc,
 };
 use itertools::Itertools;
 use std::{iter::{Iterator, Peekable}, convert::TryFrom, rc::Rc, fmt};
 use std::convert::TryInto;
 /// A location in a particular source file. Note that the
 /// sizes of the internal unsigned integer types limit
@ -22,25 +27,33 @@ impl fmt::Display for Location {
 #[derive(Debug, PartialEq, Clone)]
 pub enum TokenKind {
-  Newline, Semicolon,
+    Newline,
    Semicolon,
-  LParen, RParen,
+    LParen,
-  LSquareBracket, RSquareBracket,
+    RParen,
-  LAngleBracket, RAngleBracket,
+    LSquareBracket,
-  LCurlyBrace, RCurlyBrace,
+    RSquareBracket,
-  Pipe, Backslash,
+    LAngleBracket,
    RAngleBracket,
    LCurlyBrace,
    RCurlyBrace,
    Pipe,
    Backslash,
    AtSign,
-
+    Comma,
-  Comma, Period, Colon, Underscore,
+    Period,
-  Slash, Equals,
+    Colon,
    Underscore,
    Slash,
    Equals,
    Operator(Rc<String>),
-  DigitGroup(Rc<String>), HexLiteral(Rc<String>), BinNumberSigil,
+    DigitGroup(Rc<String>),
-  StrLiteral {
+    HexLiteral(Rc<String>),
-    s: Rc<String>,
+    BinNumberSigil,
-    prefix: Option<Rc<String>>
+    StrLiteral { s: Rc<String>, prefix: Option<Rc<String>> },
  },
    Identifier(Rc<String>),
    Keyword(Kw),
@ -56,7 +69,7 @@ impl fmt::Display for TokenKind {
            &Operator(ref s) => write!(f, "Operator({})", **s),
            &DigitGroup(ref s) => write!(f, "DigitGroup({})", s),
            &HexLiteral(ref s) => write!(f, "HexLiteral({})", s),
-      &StrLiteral {ref s, .. } => write!(f, "StrLiteral({})", s),
+            &StrLiteral { ref s, .. } => write!(f, "StrLiteral({})", s),
            &Identifier(ref s) => write!(f, "Identifier({})", s),
            &Error(ref s) => write!(f, "Error({})", s),
            other => write!(f, "{:?}", other),
@ -66,17 +79,28 @@ impl fmt::Display for TokenKind {
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum Kw {
-  If, Then, Else,
+    If,
    Then,
    Else,
    Is,
    Func,
-  For, While,
+    For,
-  Const, Let, In,
+    While,
    Const,
    Let,
    In,
    Mut,
    Return,
-  Alias, Type, SelfType, SelfIdent,
+    Alias,
-  Interface, Impl,
+    Type,
-  True, False,
+    SelfType,
-  Module, Import
+    SelfIdent,
    Interface,
    Impl,
    True,
    False,
    Module,
    Import,
 }
 impl TryFrom<&str> for Kw {
@ -127,7 +151,8 @@ impl Token {
    }
 }
-const OPERATOR_CHARS: [char; 17] = ['!', '$', '%', '&', '*', '+', '-', '.', ':', '<', '>', '=', '?', '^', '|', '~', '`'];
+const OPERATOR_CHARS: [char; 17] =
    ['!', '$', '%', '&', '*', '+', '-', '.', ':', '<', '>', '=', '?', '^', '|', '~', '`'];
 fn is_operator(c: &char) -> bool {
    OPERATOR_CHARS.iter().any(|x| x == c)
 }
@ -138,9 +163,7 @@ pub fn tokenize(input: &str) -> Vec<Token> {
    let mut tokens: Vec<Token> = Vec::new();
    let mut input = Iterator::intersperse(input.lines().enumerate(), (0, "\n"))
-      .flat_map(|(line_idx, line)| {
+        .flat_map(|(line_idx, line)| line.chars().enumerate().map(move |(ch_idx, ch)| (line_idx, ch_idx, ch)))
          line.chars().enumerate().map(move |(ch_idx, ch)| (line_idx, ch_idx, ch))
      })
        .peekable();
    while let Some((line_num, char_num, c)) = input.next() {
@ -153,7 +176,7 @@ pub fn tokenize(input: &str) -> Vec<Token> {
                        }
                    }
                    continue;
-        },
+                }
                Some('*') => {
                    input.next();
                    let mut comment_level = 1;
@ -174,15 +197,20 @@ pub fn tokenize(input: &str) -> Vec<Token> {
                    } else {
                        continue;
                    }
-        },
+                }
-        _ => Slash
+                _ => Slash,
            },
            c if c.is_whitespace() && c != '\n' => continue,
-      '\n' => Newline, ';' => Semicolon,
+            '\n' => Newline,
-      ':' => Colon, ',' => Comma,
+            ';' => Semicolon,
-      '(' => LParen, ')' => RParen,
+            ':' => Colon,
-      '{' => LCurlyBrace, '}' => RCurlyBrace,
+            ',' => Comma,
-      '[' => LSquareBracket, ']' => RSquareBracket,
+            '(' => LParen,
            ')' => RParen,
            '{' => LCurlyBrace,
            '}' => RCurlyBrace,
            '[' => LSquareBracket,
            ']' => RSquareBracket,
            '"' => handle_quote(&mut input, None),
            '\\' => Backslash,
            '@' => AtSign,
@ -191,36 +219,43 @@ pub fn tokenize(input: &str) -> Vec<Token> {
            c if is_operator(&c) => handle_operator(c, &mut input),
            unknown => Error(format!("Unexpected character: {}", unknown)),
        };
-    let location = Location { line_num: line_num.try_into().unwrap(), char_num: char_num.try_into().unwrap() };
+        let location =
            Location { line_num: line_num.try_into().unwrap(), char_num: char_num.try_into().unwrap() };
        tokens.push(Token { kind: cur_tok_kind, location });
    }
    tokens
 }
-fn handle_digit(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>) -> TokenKind {
+fn handle_digit(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
    let next_ch = input.peek().map(|&(_, _, c)| c);
    if c == '0' && next_ch == Some('x') {
        input.next();
-        let rest: String = input.peeking_take_while(|&(_, _, ref c)| c.is_digit(16) || *c == '_').map(|(_, _, c)| { c }).collect();
+        let rest: String = input
            .peeking_take_while(|&(_, _, ref c)| c.is_digit(16) || *c == '_')
            .map(|(_, _, c)| c)
            .collect();
        HexLiteral(Rc::new(rest))
    } else if c == '0' && next_ch == Some('b') {
        input.next();
        BinNumberSigil
    } else {
        let mut buf = c.to_string();
-        buf.extend(input.peeking_take_while(|&(_, _, ref c)| c.is_digit(10)).map(|(_, _, c)| { c }));
+        buf.extend(input.peeking_take_while(|&(_, _, ref c)| c.is_digit(10)).map(|(_, _, c)| c));
        DigitGroup(Rc::new(buf))
    }
 }
-fn handle_quote(input: &mut Peekable<impl Iterator<Item=CharData>>, quote_prefix: Option<&str>) -> TokenKind {
+fn handle_quote(
    input: &mut Peekable<impl Iterator<Item = CharData>>,
    quote_prefix: Option<&str>,
 ) -> TokenKind {
    let mut buf = String::new();
    loop {
-    match input.next().map(|(_, _, c)| { c }) {
+        match input.next().map(|(_, _, c)| c) {
            Some('"') => break,
            Some('\\') => {
-        let next = input.peek().map(|&(_, _, c)| { c });
+                let next = input.peek().map(|&(_, _, c)| c);
                if next == Some('n') {
                    input.next();
                    buf.push('\n')
@ -231,7 +266,7 @@ fn handle_quote(input: &mut Peekable<impl Iterator<Item=CharData>>, quote_prefix
                    input.next();
                    buf.push('\t');
                }
-      },
+            }
            Some(c) => buf.push(c),
            None => return TokenKind::Error("Unclosed string".to_string()),
        }
@ -239,24 +274,24 @@ fn handle_quote(input: &mut Peekable<impl Iterator<Item=CharData>>, quote_prefix
    TokenKind::StrLiteral { s: Rc::new(buf), prefix: quote_prefix.map(|s| Rc::new(s.to_string())) }
 }
-fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>) -> TokenKind {
+fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
    let mut buf = String::new();
    buf.push(c);
    let next_is_alphabetic = input.peek().map(|&(_, _, c)| !c.is_alphabetic()).unwrap_or(true);
    if c == '_' && next_is_alphabetic {
-    return TokenKind::Underscore
+        return TokenKind::Underscore;
    }
    loop {
-    match input.peek().map(|&(_, _, c)| { c }) {
+        match input.peek().map(|&(_, _, c)| c) {
            Some(c) if c == '"' => {
                input.next();
                return handle_quote(input, Some(&buf));
-      },
+            }
            Some(c) if c.is_alphanumeric() || c == '_' => {
                input.next();
                buf.push(c);
-      },
+            }
            _ => break,
        }
    }
@ -267,11 +302,11 @@ fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>
    }
 }
-fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>) -> TokenKind {
+fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
    match c {
        '<' | '>' | '|' | '.' | '=' => {
-      let next = &input.peek().map(|&(_, _, c)| { c });
+            let next = &input.peek().map(|&(_, _, c)| c);
-      let next_is_op = next.map(|n| { is_operator(&n) }).unwrap_or(false);
+            let next_is_op = next.map(|n| is_operator(&n)).unwrap_or(false);
            if !next_is_op {
                return match c {
                    '<' => LAngleBracket,
@ -280,9 +315,9 @@ fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>)
                    '.' => Period,
                    '=' => Equals,
                    _ => unreachable!(),
                };
            }
        }
    },
        _ => (),
    };
@ -290,27 +325,27 @@ fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>)
    if c == '`' {
        loop {
-      match input.peek().map(|&(_, _, c)| { c }) {
+            match input.peek().map(|&(_, _, c)| c) {
                Some(c) if c.is_alphabetic() || c == '_' => {
                    input.next();
                    buf.push(c);
-        },
+                }
                Some('`') => {
                    input.next();
                    break;
-        },
+                }
-        _ => break
+                _ => break,
            }
        }
    } else {
        buf.push(c);
        loop {
-      match input.peek().map(|&(_, _, c)| { c }) {
+            match input.peek().map(|&(_, _, c)| c) {
                Some(c) if is_operator(&c) => {
                    input.next();
                    buf.push(c);
-        },
+                }
-        _ => break
+                _ => break,
            }
        }
    }
@ -319,12 +354,23 @@ fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>)
 #[cfg(test)]
 mod schala_tokenizer_tests {
-  use super::*;
+    use super::{Kw::*, *};
  use super::Kw::*;
-  macro_rules! digit { ($ident:expr) => { DigitGroup(Rc::new($ident.to_string())) } }
+    macro_rules! digit {
-  macro_rules! ident { ($ident:expr) => { Identifier(Rc::new($ident.to_string())) } }
+        ($ident:expr) => {
-  macro_rules! op { ($ident:expr) => { Operator(Rc::new($ident.to_string())) } }
+            DigitGroup(Rc::new($ident.to_string()))
        };
    }
    macro_rules! ident {
        ($ident:expr) => {
            Identifier(Rc::new($ident.to_string()))
        };
    }
    macro_rules! op {
        ($ident:expr) => {
            Operator(Rc::new($ident.to_string()))
        };
    }
    fn token_kinds(input: &str) -> Vec<TokenKind> {
        tokenize(input).into_iter().map(move |tok| tok.kind).collect()
@ -333,8 +379,22 @@ mod schala_tokenizer_tests {
    #[test]
    fn tokens() {
        let output = token_kinds("let a: A<B> = c ++ d");
-    assert_eq!(output, vec![Keyword(Let), ident!("a"), Colon, ident!("A"),
+        assert_eq!(
-      LAngleBracket, ident!("B"), RAngleBracket, Equals, ident!("c"), op!("++"), ident!("d")]);
+            output,
            vec![
                Keyword(Let),
                ident!("a"),
                Colon,
                ident!("A"),
                LAngleBracket,
                ident!("B"),
                RAngleBracket,
                Equals,
                ident!("c"),
                op!("++"),
                ident!("d")
            ]
        );
    }
    #[test]
@ -356,7 +416,17 @@ mod schala_tokenizer_tests {
        //TODO not sure if I want this behavior
        let output = token_kinds("1  + /* hella */ bro */ 2");
-    assert_eq!(output, vec![digit!("1"), op!("+"), Identifier(Rc::new("bro".to_string())), Operator(Rc::new("*".to_string())), Slash, DigitGroup(Rc::new("2".to_string()))]);
+        assert_eq!(
            output,
            vec![
                digit!("1"),
                op!("+"),
                Identifier(Rc::new("bro".to_string())),
                Operator(Rc::new("*".to_string())),
                Slash,
                DigitGroup(Rc::new("2".to_string()))
            ]
        );
    }
    #[test]
@ -371,9 +441,18 @@ mod schala_tokenizer_tests {
        assert_eq!(output, vec![StrLiteral { s: Rc::new("some string".to_string()), prefix: None }]);
        let output = token_kinds(r#"b"some bytestring""#);
-    assert_eq!(output, vec![StrLiteral { s: Rc::new("some bytestring".to_string()), prefix: Some(Rc::new("b".to_string())) }]);
+        assert_eq!(
            output,
            vec![StrLiteral {
                s: Rc::new("some bytestring".to_string()),
                prefix: Some(Rc::new("b".to_string()))
            }]
        );
        let output = token_kinds(r#""Do \n \" escapes work\t""#);
-    assert_eq!(output, vec![StrLiteral { s: Rc::new("Do \n \" escapes work\t".to_string()), prefix: None }]);
+        assert_eq!(
            output,
            vec![StrLiteral { s: Rc::new("Do \n \" escapes work\t".to_string()), prefix: None }]
        );
    }
 }