Browse Source

Refactor TokenType

github-actions
Garrit Franke 3 years ago
parent
commit
de7163c40e
  1. 8
      examples/math.fx
  2. 67
      src/lexer/mod.rs
  3. 8
      src/lexer/tests.rs
  4. 1
      src/main.rs
  5. 44
      src/parser/mod.rs

8
examples/math.fx

@ -1,12 +1,12 @@
fn main() { fn main() {
let num = 10 let num = 10;
print(fib(num)) print(fib(num));
} }
fn fib(n int) int { fn fib(n int) int {
if (n <= 1) { if (n <= 1) {
return n return n;
} }
return fib(n-1) + fib(n-2) return fib(n-1) + fib(n-2);
} }

67
src/lexer/mod.rs

@ -20,17 +20,14 @@ impl Token {
} }
/// Enum representing common lexeme types. /// Enum representing common lexeme types.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum TokenKind { pub enum TokenKind {
/// Any whitespace characters sequence. /// Any whitespace characters sequence.
Whitespace, Whitespace,
Literal { Identifier(String),
kind: LiteralKind, Literal(Value),
},
/// Keywords such as 'if' or 'else' /// Keywords such as 'if' or 'else'
Identifier { Keyword(Keyword),
kind: IdentifierKind,
},
/// // Lorem Ipsum /// // Lorem Ipsum
Comment, Comment,
/// "+" /// "+"
@ -43,6 +40,8 @@ pub enum TokenKind {
Slash, Slash,
/// ":" /// ":"
Colon, Colon,
/// ";"
SemiColon,
/// "=" /// "="
Equals, Equals,
/// "==" /// "=="
@ -72,13 +71,13 @@ pub enum TokenKind {
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum LiteralKind { pub enum Value {
Int, Int,
Str, Str,
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum IdentifierKind { pub enum Keyword {
Let, Let,
If, If,
Else, Else,
@ -132,19 +131,13 @@ impl Cursor<'_> {
fn advance_token(&mut self) -> Token { fn advance_token(&mut self) -> Token {
// Original chars used to identify the token later on // Original chars used to identify the token later on
let original_chars = self.chars(); let original_chars = self.chars();
// FIXME: Identical value, since it will be used twice and is not clonable later
let original_chars2 = self.chars();
let first_char = self.bump().unwrap(); let first_char = self.bump().unwrap();
let token_kind = match first_char { let token_kind = match first_char {
c if is_whitespace(c) => self.whitespace(), c if is_whitespace(c) => self.whitespace(),
'0'..='9' => { '0'..='9' => self.number(),
let kind = self.number(); '"' | '\'' => self.string(),
TokenKind::Literal { kind }
}
'"' | '\'' => {
let kind = self.string();
TokenKind::Literal { kind }
}
'+' => Plus, '+' => Plus,
'-' => Minus, '-' => Minus,
'*' => Star, '*' => Star,
@ -157,6 +150,7 @@ impl Cursor<'_> {
_ => Equals, _ => Equals,
}, },
':' => Colon, ':' => Colon,
';' => SemiColon,
'<' => SmallerThen, '<' => SmallerThen,
'>' => LargerThen, '>' => LargerThen,
'(' => BraceOpen, '(' => BraceOpen,
@ -167,12 +161,13 @@ impl Cursor<'_> {
'}' => CurlyBracesClose, '}' => CurlyBracesClose,
c if is_id_start(c) => { c if is_id_start(c) => {
let kind = self.identifier(c); let kind = self.identifier(c);
if kind == IdentifierKind::Unknown { println!("Identifier Type: {:?}", kind);
Literal { if kind == Keyword::Unknown {
kind: LiteralKind::Str, let mut ch: String = original_chars.collect();
} ch.truncate(self.len_consumed());
TokenKind::Identifier(ch)
} else { } else {
Identifier { kind } TokenKind::Keyword(kind)
} }
} }
'\n' => CarriageReturn, '\n' => CarriageReturn,
@ -181,7 +176,7 @@ impl Cursor<'_> {
}; };
let len = self.len_consumed(); let len = self.len_consumed();
let mut raw = original_chars.collect::<String>(); let mut raw = original_chars2.collect::<String>();
// Cut the original tokens to the length of the token // Cut the original tokens to the length of the token
raw.truncate(len); raw.truncate(len);
Token::new(token_kind, len, raw) Token::new(token_kind, len, raw)
@ -208,18 +203,18 @@ impl Cursor<'_> {
Whitespace Whitespace
} }
fn number(&mut self) -> LiteralKind { fn number(&mut self) -> TokenKind {
self.eat_digits(); self.eat_digits();
LiteralKind::Int TokenKind::Literal(Value::Int)
} }
fn string(&mut self) -> LiteralKind { fn string(&mut self) -> TokenKind {
self.eat_string(); self.eat_string();
LiteralKind::Str TokenKind::Literal(Value::Str)
} }
fn identifier(&mut self, first_char: char) -> IdentifierKind { fn identifier(&mut self, first_char: char) -> Keyword {
let mut original: String = self.chars().collect::<String>(); let mut original: String = self.chars().collect::<String>();
let len = self.eat_while(is_id_continue); let len = self.eat_while(is_id_continue);
@ -229,12 +224,12 @@ impl Cursor<'_> {
original = format!("{}{}", first_char, original); original = format!("{}{}", first_char, original);
match original { match original {
c if c == "if" => IdentifierKind::If, c if c == "if" => Keyword::If,
c if c == "else" => IdentifierKind::Else, c if c == "else" => Keyword::Else,
c if c == "fn" => IdentifierKind::Function, c if c == "fn" => Keyword::Function,
c if c == "true" || c == "false" => IdentifierKind::Boolean, c if c == "true" || c == "false" => Keyword::Boolean,
c if c == "let" => IdentifierKind::Let, c if c == "let" => Keyword::Let,
_ => IdentifierKind::Unknown, _ => Keyword::Unknown,
} }
} }

8
src/lexer/tests.rs

@ -101,7 +101,7 @@ mod tests {
Token { Token {
len: 4, len: 4,
kind: TokenKind::Identifier { kind: TokenKind::Identifier {
kind: IdentifierKind::Boolean kind: Keyword::Boolean
}, },
raw: "true".to_owned() raw: "true".to_owned()
} }
@ -112,7 +112,7 @@ mod tests {
Token { Token {
len: 5, len: 5,
kind: TokenKind::Identifier { kind: TokenKind::Identifier {
kind: IdentifierKind::Boolean kind: Keyword::Boolean
}, },
raw: "false".to_owned() raw: "false".to_owned()
} }
@ -128,7 +128,7 @@ mod tests {
Token { Token {
len: 2, len: 2,
kind: TokenKind::Identifier { kind: TokenKind::Identifier {
kind: IdentifierKind::Function kind: Keyword::Function
}, },
raw: "fn".to_owned() raw: "fn".to_owned()
} }
@ -164,7 +164,7 @@ mod tests {
Token { Token {
len: 2, len: 2,
kind: TokenKind::Identifier { kind: TokenKind::Identifier {
kind: IdentifierKind::Function kind: Keyword::Function
}, },
raw: "fn".to_owned(), raw: "fn".to_owned(),
} }

1
src/main.rs

@ -1,4 +1,3 @@
use lexer::TokenKind;
use std::fs::File; use std::fs::File;
use std::io::Read; use std::io::Read;

44
src/parser/mod.rs

@ -1,5 +1,5 @@
use crate::lexer::IdentifierKind; use crate::lexer::Keyword;
use crate::lexer::{LiteralKind, Token, TokenKind}; use crate::lexer::{Token, TokenKind};
use crate::parser::node_type::*; use crate::parser::node_type::*;
use std::iter::Peekable; use std::iter::Peekable;
use std::vec::IntoIter; use std::vec::IntoIter;
@ -85,44 +85,22 @@ impl Parser {
} }
} }
fn match_identifier_kind(&mut self, identifier_kind: IdentifierKind) -> Result<(), String> { fn match_keyword(&mut self, keyword: Keyword) -> Result<(), String> {
let token = self.next_token(); let token = self.next_token();
println!( println!(
"match_identifier_kind: Token: {:?}, identifier_kind: {:?}", "match_identifier_kind: Token: {:?}, identifier_kind: {:?}",
token, identifier_kind token, keyword
); );
match token.kind { match token.kind {
TokenKind::Identifier { TokenKind::Keyword(_) => Ok(()),
kind: identifier_kind,
} => Ok(()),
other => Err(format!("Expected SemiColon, found {:?}", other)), other => Err(format!("Expected SemiColon, found {:?}", other)),
} }
} }
fn match_identifier(&mut self) -> Result<String, String> { fn match_identifier(&mut self) -> Result<String, String> {
let token = self.next_token(); match self.next_token().kind {
TokenKind::Identifier(n) => Ok(n),
// TODO: Match any IdentifierKind. This can definetely be prettier, but I couldn't figure it out in a hurry
match &token.kind {
TokenKind::Identifier {
kind: IdentifierKind::Boolean,
}
| TokenKind::Identifier {
kind: IdentifierKind::Else,
}
| TokenKind::Identifier {
kind: IdentifierKind::Function,
}
| TokenKind::Identifier {
kind: IdentifierKind::If,
}
| TokenKind::Identifier {
kind: IdentifierKind::Let,
}
| TokenKind::Identifier {
kind: IdentifierKind::Unknown,
} => Ok(token.raw),
other => Err(format!("Expected Identifier, found {:?}", other)), other => Err(format!("Expected Identifier, found {:?}", other)),
} }
} }
@ -144,12 +122,8 @@ impl Parser {
} }
fn parse_function(&mut self) -> Result<Function, String> { fn parse_function(&mut self) -> Result<Function, String> {
self.match_identifier_kind(IdentifierKind::Function)?; self.match_keyword(Keyword::Function)?;
let name = self let name = self.match_identifier()?;
.match_token(TokenKind::Literal {
kind: LiteralKind::Str,
})?
.raw;
self.match_token(TokenKind::BraceOpen)?; self.match_token(TokenKind::BraceOpen)?;
self.match_token(TokenKind::BraceClose)?; self.match_token(TokenKind::BraceClose)?;

Loading…
Cancel
Save