From 86ca1cd2dde2b3b2dd77a1bda6e803baa47c0b7c Mon Sep 17 00:00:00 2001 From: Garrit Franke Date: Thu, 3 Dec 2020 18:39:08 +0100 Subject: [PATCH] Parse functions --- examples/hello_world.fx | 6 +- src/lexer/mod.rs | 11 ++- src/main.rs | 8 +- src/parser/mod.rs | 175 +++++++++++++++++++++++++++++++++++----- src/parser/node_type.rs | 39 +++++++++ 5 files changed, 209 insertions(+), 30 deletions(-) create mode 100644 src/parser/node_type.rs diff --git a/examples/hello_world.fx b/examples/hello_world.fx index e13bcf3..9a48552 100644 --- a/examples/hello_world.fx +++ b/examples/hello_world.fx @@ -1,4 +1,4 @@ fn main() { - let message = "Hello World!" - print(message) -} \ No newline at end of file + + + } \ No newline at end of file diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 14477e2..5c8b774 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -6,7 +6,7 @@ use cursor::Cursor; #[cfg(test)] mod tests; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone)] pub struct Token { pub kind: TokenKind, pub len: usize, @@ -167,8 +167,13 @@ impl Cursor<'_> { '}' => CurlyBracesClose, c if is_id_start(c) => { let kind = self.identifier(c); - - Identifier { kind } + if kind == IdentifierKind::Unknown { + Literal { + kind: LiteralKind::Str, + } + } else { + Identifier { kind } + } } '\n' => CarriageReturn, '\t' => Tab, diff --git a/src/main.rs b/src/main.rs index 7afc0be..b714e0b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,10 +13,8 @@ fn main() -> std::io::Result<()> { let tokens = lexer::tokenize(&contents); // let ast = parser::parse(tokens.into_iter()); - for token in tokens { - if token.kind != TokenKind::Whitespace && token.kind != TokenKind::CarriageReturn { - println!("{:?}", token); - } - } + let program = parser::parse(tokens); + + println!("{:?}", program); Ok(()) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3b06319..d57b7c8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,36 +1,173 @@ -use crate::lexer::Token; +use crate::lexer::IdentifierKind; +use crate::lexer::{LiteralKind, Token, TokenKind}; +use crate::parser::node_type::*; +use std::iter::Peekable; +use std::vec::IntoIter; + +mod node_type; pub struct Parser { - tokens: Box>, - current: Option, - indentation_level: usize, + tokens: Peekable>, + peeked: Vec, } impl Parser { - pub(crate) fn new(tokens: impl Iterator + 'static) -> Self { + pub fn new(tokens: Vec) -> Parser { Parser { - tokens: Box::new(tokens), - current: None, - indentation_level: 0, + tokens: tokens.into_iter().peekable(), + peeked: vec![], + } + } + + pub fn parse(&mut self) -> Result { + self.parse_program() + } + + fn next(&mut self) -> Option { + if self.peeked.is_empty() { + self.tokens.next() + } else { + self.peeked.pop() + } + } + + fn peek(&mut self) -> Option { + if let Some(token) = self.next() { + self.push(Some(token.to_owned())); + Some(token) + } else { + None + } + } + + fn drop(&mut self, count: usize) { + for _ in 0..count { + self.next(); + } + } + + fn push(&mut self, token: Option) { + if let Some(t) = token { + self.peeked.push(t); + } + } + + fn has_more(&mut self) -> bool { + !self.peeked.is_empty() || self.tokens.peek().is_some() + } + + fn next_token(&mut self) -> Token { + self.next().expect("failed to parse") + } + + fn match_token(&mut self, token_kind: TokenKind) -> Result { + loop { + match self.peek().expect("Failed to peek token").kind { + TokenKind::Whitespace | TokenKind::Tab | TokenKind::CarriageReturn => { + self.next_token(); + } + _ => break, + } + } + match self.next() { + Some(token) if token.kind == token_kind => Ok(token), + other => Err(format!( + "Token {:?} not found, found {:?}", + token_kind, other + )), + } + } + + fn peek_token(&mut self, token_kind: TokenKind) -> Result { + match self.peek() { + Some(token) if token.kind == token_kind => Ok(token), + other => Err(format!( + "Token {:?} not found, found {:?}", + token_kind, other + )), } } - fn next(&mut self) { - self.current = self.tokens.next(); + fn match_identifier_kind(&mut self, identifier_kind: IdentifierKind) -> Result<(), String> { + let token = self.next_token(); + + match token.kind { + TokenKind::Identifier { + kind: identifier_kind, + } => Ok(()), + other => Err(format!("Expected SemiColon, found {:?}", other)), + } + } + + fn match_identifier(&mut self) -> Result { + let token = self.next_token(); + + // TODO: Match any IdentifierKind. This can definetely be prettier, but I couldn't figure it out in a hurry + match &token.kind { + TokenKind::Identifier { + kind: IdentifierKind::Boolean, + } + | TokenKind::Identifier { + kind: IdentifierKind::Else, + } + | TokenKind::Identifier { + kind: IdentifierKind::Function, + } + | TokenKind::Identifier { + kind: IdentifierKind::If, + } + | TokenKind::Identifier { + kind: IdentifierKind::Let, + } + | TokenKind::Identifier { + kind: IdentifierKind::Unknown, + } => Ok(token.raw), + other => Err(format!("Expected Identifier, found {:?}", other)), + } } } -#[derive(Debug)] -pub struct AST; +impl Parser { + fn parse_program(&mut self) -> Result { + let mut functions = Vec::new(); + let globals = Vec::new(); -pub fn parse(tokens: impl Iterator + 'static) -> AST { - let mut parser = Parser::new(tokens); - let ast = AST {}; + while self.has_more() { + match self.next_token() { + t if t.kind == TokenKind::Whitespace => continue, + _ => functions.push(self.parse_function().expect("Failed to parse function")), + } + } + + Ok(Program { + func: functions, + globals: globals, + }) + } - loop { - parser.next(); - break; + fn parse_function(&mut self) -> Result { + self.match_identifier_kind(IdentifierKind::Function); + let name = self + .match_token(TokenKind::Literal { + kind: LiteralKind::Str, + })? + .raw; + + self.match_token(TokenKind::BraceOpen); + self.match_token(TokenKind::BraceClose); + self.match_token(TokenKind::CurlyBracesOpen); + self.match_token(TokenKind::CurlyBracesClose); + + Ok(Function { + name: name, + arguments: Vec::new(), + statements: Vec::new(), + }) } +} + +pub fn parse(tokens: Vec) -> Result { + let mut parser = Parser::new(tokens); - ast + parser.parse() } diff --git a/src/parser/node_type.rs b/src/parser/node_type.rs new file mode 100644 index 0000000..6ff33b1 --- /dev/null +++ b/src/parser/node_type.rs @@ -0,0 +1,39 @@ +#[derive(Debug)] +pub struct Program { + pub func: Vec, + pub globals: Vec, +} + +#[derive(Debug)] +pub struct Function { + pub name: String, + pub arguments: Vec, + pub statements: Vec, +} + +#[derive(Debug, Eq, PartialEq)] +pub struct Variable { + pub name: String, +} + +#[derive(Debug, Eq, PartialEq)] +pub enum Statement { + Declare(Variable, Option), + Return(Expression), + If(Expression, Box, Option>), + While(Expression, Box), + Exp(Expression), + Compound(Vec), +} + +#[derive(Debug, Eq, PartialEq)] +pub enum Expression { + Int(u32), + Char(u8), + FunctionCall(String, Vec), + Variable(String), + VariableRef(String), + Assign(String, Box), + AssignPostfix(String, Box), + Ternary(Box, Box, Box), +}