diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 787b855..d8e1ff4 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -13,401 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -use crate::lexer::Keyword; -use crate::lexer::{Token, TokenKind, Value}; -use crate::parser::node_type::Statement; -use crate::parser::node_type::*; -use crate::util::string_util::highlight_position_in_file; -use std::convert::TryFrom; -use std::iter::Peekable; -use std::vec::IntoIter; - pub mod node_type; - +pub mod parser; +mod rules; +use crate::lexer::Token; +use node_type::Program; #[cfg(test)] mod tests; -pub struct Parser { - tokens: Peekable>, - peeked: Vec, - current: Option, - prev: Option, - raw: Option, -} - -impl Parser { - pub fn new(tokens: Vec, raw: Option) -> Parser { - let tokens_without_whitespace: Vec = tokens - .into_iter() - .filter(|token| token.kind != TokenKind::Whitespace && token.kind != TokenKind::Comment) - .collect(); - Parser { - tokens: tokens_without_whitespace.into_iter().peekable(), - peeked: vec![], - current: None, - prev: None, - raw: raw, - } - } - - pub fn parse(&mut self) -> Result { - self.parse_program() - } - - fn next(&mut self) -> Result { - self.prev = self.current.to_owned(); - let item = if self.peeked.is_empty() { - self.tokens.next() - } else { - self.peeked.pop() - }; - - self.current = item.to_owned(); - item.ok_or_else(|| "Expected token".into()) - } - - fn peek(&mut self) -> Result { - let token = self.next()?; - self.push(token.to_owned()); - Ok(token) - } - - fn drop(&mut self, count: usize) { - for _ in 0..count { - let _ = self.next(); - } - } - - fn push(&mut self, token: Token) { - self.peeked.push(token); - } - - fn has_more(&mut self) -> bool { - !self.peeked.is_empty() || self.tokens.peek().is_some() - } - - fn match_token(&mut self, token_kind: TokenKind) -> Result { - match self.next()? { - token if token.kind == token_kind => Ok(token), - other => Err(self.make_error(token_kind, other)), - } - } - - fn peek_token(&mut self, token_kind: TokenKind) -> Result { - match self.peek()? { - token if token.kind == token_kind => Ok(token), - other => Err(format!( - "Token {:?} not found, found {:?}", - token_kind, other - )), - } - } - fn match_keyword(&mut self, keyword: Keyword) -> Result<(), String> { - let token = self.next()?; - match &token.kind { - TokenKind::Keyword(ref k) if k == &keyword => Ok(()), - _ => Err(self.make_error(TokenKind::SemiColon, token)), - } - } - fn match_operator(&mut self) -> Result { - BinOp::try_from(self.next()?.kind) - } - fn match_identifier(&mut self) -> Result { - match self.next()?.kind { - TokenKind::Identifier(n) => Ok(n), - other => Err(format!("Expected Identifier, found {:?}", other)), - } - } - - fn make_error(&mut self, token_kind: TokenKind, other: Token) -> String { - match &self.raw { - Some(raw_file) => format!( - "Token {:?} not found, found {:?}\n{:?}", - token_kind, - other, - highlight_position_in_file(raw_file.to_string(), other.to_owned().pos) - ), - None => format!("Token {:?} not found, found {:?}", token_kind, other), - } - } - - fn prev(&mut self) -> Option { - self.prev.clone() - } -} - -impl Parser { - fn parse_program(&mut self) -> Result { - let mut functions = Vec::new(); - let globals = Vec::new(); - - while self.has_more() { - functions.push(self.parse_function()?) - } - - Ok(Program { - func: functions, - globals: globals, - }) - } - - fn parse_block(&mut self) -> Result { - self.match_token(TokenKind::CurlyBracesOpen)?; - - let mut statements = vec![]; - - while let Err(_) = self.peek_token(TokenKind::CurlyBracesClose) { - let statement = self.parse_statement()?; - dbg!("{:?}", &statement); - statements.push(statement); - } - - self.match_token(TokenKind::CurlyBracesClose)?; - - Ok(Statement::Block(statements)) - } - - fn parse_function(&mut self) -> Result { - self.match_keyword(Keyword::Function)?; - let name = self.match_identifier()?; - - self.match_token(TokenKind::BraceOpen)?; - - let arguments: Vec = match self.peek()? { - t if t.kind == TokenKind::BraceClose => Vec::new(), - _ => self - .parse_arguments() - .expect("Failed to parse function arguments"), - }; - - self.match_token(TokenKind::BraceClose)?; - - let body = self.parse_block()?; - - Ok(Function { - name: name, - arguments: arguments, - body: body, - }) - } - - fn parse_arguments(&mut self) -> Result, String> { - let mut args = Vec::new(); - while let Err(_) = self.peek_token(TokenKind::BraceClose) { - let next = self.next()?; - match next.kind { - TokenKind::Identifier(name) => args.push(Variable { name: name }), - _ => return Err(self.make_error(TokenKind::Identifier("Argument".into()), next)), - } - } - - Ok(args) - } - - fn parse_statement(&mut self) -> Result { - let token = self.peek()?; - let state = match &token.kind { - TokenKind::Keyword(Keyword::Let) => self.parse_declare(), - TokenKind::Keyword(Keyword::Return) => self.parse_return(), - TokenKind::Keyword(Keyword::If) => self.parse_conditional_statement(), - TokenKind::Identifier(_) => { - let ident = self.match_identifier()?; - if let Ok(_) = self.peek_token(TokenKind::BraceOpen) { - let state = self.parse_function_call(Some(ident))?; - Ok(Statement::Exp(state)) - } else { - let state = Statement::Exp(Expression::Variable(ident.into())); - Ok(state) - } - } - TokenKind::Literal(_) => Ok(Statement::Exp(self.parse_expression()?)), - _ => return Err(self.make_error(TokenKind::Unknown, token)), - }; - state - } - - /// Parses a function call from tokens. - /// The name of the function needs to be passed here, because we have already passed it with our cursor. - /// If no function name is provided, the next token will be fetched - fn parse_function_call(&mut self, func_name: Option) -> Result { - let name = match func_name { - Some(name) => name, - None => self.next()?.raw, - }; - - self.match_token(TokenKind::BraceOpen)?; - - let mut args = Vec::new(); - - loop { - let next = self.peek()?; - match &next.kind { - TokenKind::BraceClose => break, - TokenKind::Comma => { - self.next(); - continue; - } - TokenKind::Identifier(_) | TokenKind::Literal(_) => { - args.push(self.parse_expression()?) - } - _ => { - return Err(self.make_error(TokenKind::BraceClose, next)); - } - }; - } - - self.match_token(TokenKind::BraceClose)?; - Ok(Expression::FunctionCall(name, args)) - } - - fn parse_return(&mut self) -> Result { - self.match_keyword(Keyword::Return)?; - let peeked = self.peek()?; - match peeked.kind { - TokenKind::SemiColon => Ok(Statement::Return(None)), - _ => Ok(Statement::Return(Some(self.parse_expression()?))), - } - } - - fn parse_expression(&mut self) -> Result { - let token = self.next()?; - match token.kind { - TokenKind::Literal(Value::Int) => { - let state = match BinOp::try_from(self.peek()?.kind) { - Ok(_) => self.parse_bin_op(None)?, - Err(_) => Expression::Int(token.raw.parse::().map_err(|e| e.to_string())?), - }; - Ok(state) - } - TokenKind::Literal(Value::Str) => { - let state = match BinOp::try_from(self.peek()?.kind) { - Ok(_) => self.parse_bin_op(None)?, - Err(_) => Expression::Str(token.raw), - }; - Ok(state) - } - TokenKind::Identifier(val) => { - let next = self.peek()?; - let state = match &next.kind { - TokenKind::BraceOpen => { - let func_call = self.parse_function_call(Some(val))?; - match BinOp::try_from(self.peek()?.kind) { - Ok(_) => self.parse_bin_op(Some(func_call))?, - Err(_) => func_call, - } - } - _ => match BinOp::try_from(self.peek()?.kind) { - Ok(_) => self.parse_bin_op(Some(Expression::Variable(token.raw)))?, - Err(_) => Expression::Variable(val), - }, - }; - Ok(state) - } - TokenKind::SquareBraceOpen => self.parse_array(), - other => Err(format!("Expected Expression, found {:?}", other)), - } - } - - fn parse_array(&mut self) -> Result { - let mut elements = Vec::new(); - loop { - let next = self.next()?; - match next.kind { - TokenKind::Literal(Value::Int) => { - let value = next.raw.parse::().map_err(|e| e.to_string())?; - elements.push(Expression::Int(value)); - } - _ => return Err(self.make_error(TokenKind::Identifier("Argument".into()), next)), - }; - if self.peek_token(TokenKind::SquareBraceClose).is_ok() { - break; - } - self.match_token(TokenKind::Comma)?; - } - - self.match_token(TokenKind::SquareBraceClose)?; - - Ok(Expression::Array(elements)) - } - - fn parse_conditional_statement(&mut self) -> Result { - self.match_keyword(Keyword::If)?; - let condition = self.parse_expression()?; - - let body = self.parse_block()?; - - match self.peek()? { - tok if tok.kind == TokenKind::Keyword(Keyword::Else) => { - self.next(); - - let peeked = self.peek()?; - - let has_else = match &peeked.kind { - TokenKind::CurlyBracesOpen => Some(self.parse_block()?), - _ => None, - }; - - let else_branch = match has_else { - Some(branch) => branch, - None => self.parse_conditional_statement()?, - }; - Ok(Statement::If( - condition, - Box::new(body), - Some(Box::new(else_branch)), - )) - } - _ => Ok(Statement::If(condition, Box::new(body), None)), - } - } - - /// In some occurences a complex expression has been evaluated before a binary operation is encountered. - /// The following expression is one such example: - /// ``` - /// foo(1) * 2 - /// ``` - /// In this case, the function call has already been evaluated, and needs to be passed to this function. - fn parse_bin_op(&mut self, lhs: Option) -> Result { - let left = match lhs { - Some(lhs) => lhs, - None => { - let prev = self.prev().ok_or_else(|| "Expected Token")?; - match &prev.kind { - TokenKind::Identifier(_) | TokenKind::Literal(_) => { - Ok(Expression::try_from(prev)?) - } - _ => Err(self.make_error(TokenKind::Unknown, prev)), - }? - } - }; - - let op = self.match_operator()?; - - Ok(Expression::BinOp( - Box::from(Expression::try_from(left).map_err(|e| e.to_string())?), - op, - Box::from(self.parse_expression()?), - )) - } - - fn parse_declare(&mut self) -> Result { - self.match_keyword(Keyword::Let)?; - match (self.next()?.kind, self.peek()?.kind) { - (TokenKind::Identifier(name), TokenKind::SemiColon) => { - Ok(Statement::Declare(Variable { name }, None)) - } - (TokenKind::Identifier(name), TokenKind::Assign) => { - self.drop(1); - let exp = self.parse_expression().ok(); - Ok(Statement::Declare(Variable { name }, exp)) - } - other => Err(format!("Expected identifier, found {:?}", other)), - } - } -} - -pub fn parse(tokens: Vec, raw: Option) -> Result { - let mut parser = Parser::new(tokens, raw); - +pub fn parse(tokens: Vec, raw: Option) -> Result { + let mut parser = parser::Parser::new(tokens, raw); parser.parse() } diff --git a/src/parser/node_type.rs b/src/parser/node_type.rs index a926a54..5abda20 100644 --- a/src/parser/node_type.rs +++ b/src/parser/node_type.rs @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -use crate::parser::{Token, TokenKind, Value}; +use crate::lexer::*; use core::convert::TryFrom; #[derive(Debug)] diff --git a/src/parser/parser.rs b/src/parser/parser.rs new file mode 100644 index 0000000..c92b412 --- /dev/null +++ b/src/parser/parser.rs @@ -0,0 +1,133 @@ +/** + * Copyright 2020 Garrit Franke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +use crate::lexer::Keyword; +use crate::lexer::{Token, TokenKind}; +use crate::parser::node_type::*; +use crate::util::string_util::highlight_position_in_file; +use std::convert::TryFrom; +use std::iter::Peekable; +use std::vec::IntoIter; + +pub struct Parser { + tokens: Peekable>, + peeked: Vec, + current: Option, + prev: Option, + raw: Option, +} + +impl Parser { + pub fn new(tokens: Vec, raw: Option) -> Parser { + let tokens_without_whitespace: Vec = tokens + .into_iter() + .filter(|token| token.kind != TokenKind::Whitespace && token.kind != TokenKind::Comment) + .collect(); + Parser { + tokens: tokens_without_whitespace.into_iter().peekable(), + peeked: vec![], + current: None, + prev: None, + raw: raw, + } + } + + pub fn parse(&mut self) -> Result { + self.parse_program() + } + + pub(super) fn next(&mut self) -> Result { + self.prev = self.current.to_owned(); + let item = if self.peeked.is_empty() { + self.tokens.next() + } else { + self.peeked.pop() + }; + + self.current = item.to_owned(); + item.ok_or_else(|| "Expected token".into()) + } + + pub(super) fn peek(&mut self) -> Result { + let token = self.next()?; + self.push(token.to_owned()); + Ok(token) + } + + pub(super) fn drop(&mut self, count: usize) { + for _ in 0..count { + let _ = self.next(); + } + } + + pub(super) fn push(&mut self, token: Token) { + self.peeked.push(token); + } + + pub(super) fn has_more(&mut self) -> bool { + !self.peeked.is_empty() || self.tokens.peek().is_some() + } + + pub(super) fn match_token(&mut self, token_kind: TokenKind) -> Result { + match self.next()? { + token if token.kind == token_kind => Ok(token), + other => Err(self.make_error(token_kind, other)), + } + } + + pub(super) fn peek_token(&mut self, token_kind: TokenKind) -> Result { + match self.peek()? { + token if token.kind == token_kind => Ok(token), + other => Err(format!( + "Token {:?} not found, found {:?}", + token_kind, other + )), + } + } + + pub(super) fn match_keyword(&mut self, keyword: Keyword) -> Result<(), String> { + let token = self.next()?; + match &token.kind { + TokenKind::Keyword(ref k) if k == &keyword => Ok(()), + _ => Err(self.make_error(TokenKind::SemiColon, token)), + } + } + + pub(super) fn match_operator(&mut self) -> Result { + BinOp::try_from(self.next()?.kind) + } + pub(super) fn match_identifier(&mut self) -> Result { + match self.next()?.kind { + TokenKind::Identifier(n) => Ok(n), + other => Err(format!("Expected Identifier, found {:?}", other)), + } + } + + pub(super) fn make_error(&mut self, token_kind: TokenKind, other: Token) -> String { + match &self.raw { + Some(raw_file) => format!( + "Token {:?} not found, found {:?}\n{:?}", + token_kind, + other, + highlight_position_in_file(raw_file.to_string(), other.to_owned().pos) + ), + None => format!("Token {:?} not found, found {:?}", token_kind, other), + } + } + + pub(super) fn prev(&mut self) -> Option { + self.prev.clone() + } +} diff --git a/src/parser/rules.rs b/src/parser/rules.rs new file mode 100644 index 0000000..25b396d --- /dev/null +++ b/src/parser/rules.rs @@ -0,0 +1,276 @@ +use super::node_type::Statement; +use super::node_type::*; +use super::parser::Parser; +use crate::lexer::Keyword; +use crate::lexer::{Token, TokenKind, Value}; +use std::convert::TryFrom; + +impl Parser { + pub(super) fn parse_program(&mut self) -> Result { + let mut functions = Vec::new(); + let globals = Vec::new(); + + while self.has_more() { + functions.push(self.parse_function()?) + } + + Ok(Program { + func: functions, + globals: globals, + }) + } + + fn parse_block(&mut self) -> Result { + self.match_token(TokenKind::CurlyBracesOpen)?; + + let mut statements = vec![]; + + while let Err(_) = self.peek_token(TokenKind::CurlyBracesClose) { + let statement = self.parse_statement()?; + dbg!("{:?}", &statement); + statements.push(statement); + } + + self.match_token(TokenKind::CurlyBracesClose)?; + + Ok(Statement::Block(statements)) + } + + fn parse_function(&mut self) -> Result { + self.match_keyword(Keyword::Function)?; + let name = self.match_identifier()?; + + self.match_token(TokenKind::BraceOpen)?; + + let arguments: Vec = match self.peek()? { + t if t.kind == TokenKind::BraceClose => Vec::new(), + _ => self + .parse_arguments() + .expect("Failed to parse function arguments"), + }; + + self.match_token(TokenKind::BraceClose)?; + + let body = self.parse_block()?; + + Ok(Function { + name: name, + arguments: arguments, + body: body, + }) + } + + fn parse_arguments(&mut self) -> Result, String> { + let mut args = Vec::new(); + while let Err(_) = self.peek_token(TokenKind::BraceClose) { + let next = self.next()?; + match next.kind { + TokenKind::Identifier(name) => args.push(Variable { name: name }), + _ => return Err(self.make_error(TokenKind::Identifier("Argument".into()), next)), + } + } + + Ok(args) + } + + fn parse_statement(&mut self) -> Result { + let token = self.peek()?; + let state = match &token.kind { + TokenKind::Keyword(Keyword::Let) => self.parse_declare(), + TokenKind::Keyword(Keyword::Return) => self.parse_return(), + TokenKind::Keyword(Keyword::If) => self.parse_conditional_statement(), + TokenKind::Identifier(_) => { + let ident = self.match_identifier()?; + if let Ok(_) = self.peek_token(TokenKind::BraceOpen) { + let state = self.parse_function_call(Some(ident))?; + Ok(Statement::Exp(state)) + } else { + let state = Statement::Exp(Expression::Variable(ident.into())); + Ok(state) + } + } + TokenKind::Literal(_) => Ok(Statement::Exp(self.parse_expression()?)), + _ => return Err(self.make_error(TokenKind::Unknown, token)), + }; + state + } + + /// Parses a function call from tokens. + /// The name of the function needs to be passed here, because we have already passed it with our cursor. + /// If no function name is provided, the next token will be fetched + fn parse_function_call(&mut self, func_name: Option) -> Result { + let name = match func_name { + Some(name) => name, + None => self.next()?.raw, + }; + + self.match_token(TokenKind::BraceOpen)?; + + let mut args = Vec::new(); + + loop { + let next = self.peek()?; + match &next.kind { + TokenKind::BraceClose => break, + TokenKind::Comma => { + self.next(); + continue; + } + TokenKind::Identifier(_) | TokenKind::Literal(_) => { + args.push(self.parse_expression()?) + } + _ => { + return Err(self.make_error(TokenKind::BraceClose, next)); + } + }; + } + + self.match_token(TokenKind::BraceClose)?; + Ok(Expression::FunctionCall(name, args)) + } + + fn parse_return(&mut self) -> Result { + self.match_keyword(Keyword::Return)?; + let peeked = self.peek()?; + match peeked.kind { + TokenKind::SemiColon => Ok(Statement::Return(None)), + _ => Ok(Statement::Return(Some(self.parse_expression()?))), + } + } + + fn parse_expression(&mut self) -> Result { + let token = self.next()?; + match token.kind { + TokenKind::Literal(Value::Int) => { + let state = match BinOp::try_from(self.peek()?.kind) { + Ok(_) => self.parse_bin_op(None)?, + Err(_) => Expression::Int(token.raw.parse::().map_err(|e| e.to_string())?), + }; + Ok(state) + } + TokenKind::Literal(Value::Str) => { + let state = match BinOp::try_from(self.peek()?.kind) { + Ok(_) => self.parse_bin_op(None)?, + Err(_) => Expression::Str(token.raw), + }; + Ok(state) + } + TokenKind::Identifier(val) => { + let next = self.peek()?; + let state = match &next.kind { + TokenKind::BraceOpen => { + let func_call = self.parse_function_call(Some(val))?; + match BinOp::try_from(self.peek()?.kind) { + Ok(_) => self.parse_bin_op(Some(func_call))?, + Err(_) => func_call, + } + } + _ => match BinOp::try_from(self.peek()?.kind) { + Ok(_) => self.parse_bin_op(Some(Expression::Variable(token.raw)))?, + Err(_) => Expression::Variable(val), + }, + }; + Ok(state) + } + TokenKind::SquareBraceOpen => self.parse_array(), + other => Err(format!("Expected Expression, found {:?}", other)), + } + } + + fn parse_array(&mut self) -> Result { + let mut elements = Vec::new(); + loop { + let next = self.next()?; + match next.kind { + TokenKind::Literal(Value::Int) => { + let value = next.raw.parse::().map_err(|e| e.to_string())?; + elements.push(Expression::Int(value)); + } + _ => return Err(self.make_error(TokenKind::Identifier("Argument".into()), next)), + }; + if self.peek_token(TokenKind::SquareBraceClose).is_ok() { + break; + } + self.match_token(TokenKind::Comma)?; + } + + self.match_token(TokenKind::SquareBraceClose)?; + + Ok(Expression::Array(elements)) + } + + fn parse_conditional_statement(&mut self) -> Result { + self.match_keyword(Keyword::If)?; + let condition = self.parse_expression()?; + + let body = self.parse_block()?; + + match self.peek()? { + tok if tok.kind == TokenKind::Keyword(Keyword::Else) => { + self.next(); + + let peeked = self.peek()?; + + let has_else = match &peeked.kind { + TokenKind::CurlyBracesOpen => Some(self.parse_block()?), + _ => None, + }; + + let else_branch = match has_else { + Some(branch) => branch, + None => self.parse_conditional_statement()?, + }; + Ok(Statement::If( + condition, + Box::new(body), + Some(Box::new(else_branch)), + )) + } + _ => Ok(Statement::If(condition, Box::new(body), None)), + } + } + + /// In some occurences a complex expression has been evaluated before a binary operation is encountered. + /// The following expression is one such example: + /// ``` + /// foo(1) * 2 + /// ``` + /// In this case, the function call has already been evaluated, and needs to be passed to this function. + fn parse_bin_op(&mut self, lhs: Option) -> Result { + let left = match lhs { + Some(lhs) => lhs, + None => { + let prev = self.prev().ok_or_else(|| "Expected Token")?; + match &prev.kind { + TokenKind::Identifier(_) | TokenKind::Literal(_) => { + Ok(Expression::try_from(prev)?) + } + _ => Err(self.make_error(TokenKind::Unknown, prev)), + }? + } + }; + + let op = self.match_operator()?; + + Ok(Expression::BinOp( + Box::from(Expression::try_from(left).map_err(|e| e.to_string())?), + op, + Box::from(self.parse_expression()?), + )) + } + + fn parse_declare(&mut self) -> Result { + self.match_keyword(Keyword::Let)?; + match (self.next()?.kind, self.peek()?.kind) { + (TokenKind::Identifier(name), TokenKind::SemiColon) => { + Ok(Statement::Declare(Variable { name }, None)) + } + (TokenKind::Identifier(name), TokenKind::Assign) => { + self.drop(1); + let exp = self.parse_expression().ok(); + Ok(Statement::Declare(Variable { name }, exp)) + } + other => Err(format!("Expected identifier, found {:?}", other)), + } + } +} diff --git a/src/parser/tests.rs b/src/parser/tests.rs index 7c6b2b9..3be2a1f 100644 --- a/src/parser/tests.rs +++ b/src/parser/tests.rs @@ -1,3 +1,4 @@ +use crate::lexer::*; /** * Copyright 2020 Garrit Franke * @@ -13,8 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -use crate::lexer::*; -use crate::parser::*; +use crate::parser::parse; #[test] fn test_parse_empty_function() {