diff --git a/examples/hello_world.sb b/examples/hello_world.sb index 88d6854..1221350 100644 --- a/examples/hello_world.sb +++ b/examples/hello_world.sb @@ -1,5 +1,3 @@ -// This is a comment - fn main() { let x = "Hello World"; return x; diff --git a/src/lexer/cursor.rs b/src/lexer/cursor.rs index feb9301..0fe1914 100644 --- a/src/lexer/cursor.rs +++ b/src/lexer/cursor.rs @@ -1,3 +1,4 @@ +use crate::lexer::Position; use std::str::Chars; /// Peekable iterator over a char sequence. @@ -6,6 +7,7 @@ use std::str::Chars; /// and position can be shifted forward via `bump` method. pub(crate) struct Cursor<'a> { initial_length: usize, + pos: &'a mut Position, len: usize, chars: Chars<'a>, prev: char, @@ -14,13 +16,18 @@ pub(crate) struct Cursor<'a> { pub(crate) const EOF_CHAR: char = '\0'; impl<'a> Cursor<'a> { - pub(crate) fn new(input: &'a str, initial_len: usize) -> Cursor<'a> { + pub(crate) fn new( + input: &'a str, + initial_len: usize, + position: &'a mut Position, + ) -> Cursor<'a> { Cursor { initial_length: initial_len, len: input.len(), chars: input.chars(), #[cfg(debug_assertions)] prev: EOF_CHAR, + pos: position, } } @@ -66,13 +73,27 @@ impl<'a> Cursor<'a> { self.chars.clone() } - pub(crate) fn pos(&self) -> usize { - self.initial_length - self.len + pub(crate) fn pos(&self) -> Position { + let mut p = self.pos.clone(); + p } /// Moves to the next character. pub(crate) fn bump(&mut self) -> Option { let c = self.chars.next()?; + // If first token, the position should be set to 0 + match self.pos.raw { + usize::MAX => self.pos.raw = 0, + _ => { + self.pos.raw += 1; + self.pos.offset += 1; + } + } + + if c == '\n' { + self.pos.line += 1; + self.pos.offset = 0; + } #[cfg(debug_assertions)] { diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index df4ce27..5af627f 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -11,11 +11,11 @@ pub struct Token { pub kind: TokenKind, pub len: usize, pub raw: String, - pub pos: usize, + pub pos: Position, } impl Token { - fn new(kind: TokenKind, len: usize, raw: String, pos: usize) -> Token { + fn new(kind: TokenKind, len: usize, raw: String, pos: Position) -> Token { Token { kind, len, @@ -25,6 +25,13 @@ impl Token { } } +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub struct Position { + pub line: usize, + pub offset: usize, + pub raw: usize, +} + /// Enum representing common lexeme types. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum TokenKind { @@ -95,12 +102,17 @@ pub enum Keyword { /// Creates an iterator that produces tokens from the input string. pub fn tokenize(mut input: &str) -> Vec { - let mut initial_length = input.len(); + let initial_length = input.len(); + let mut pos = Position { + raw: usize::MAX, + line: 1, + offset: 0, + }; std::iter::from_fn(move || { if input.is_empty() { return None; } - let token = first_token(input, initial_length); + let token = first_token(input, initial_length, &mut pos); input = &input[token.len..]; Some(token) }) @@ -108,9 +120,9 @@ pub fn tokenize(mut input: &str) -> Vec { } /// Parses the first token from the provided input string. -pub fn first_token(input: &str, initial_len: usize) -> Token { +pub fn first_token(input: &str, initial_len: usize, pos: &mut Position) -> Token { debug_assert!(!input.is_empty()); - Cursor::new(input, initial_len).advance_token() + Cursor::new(input, initial_len, pos).advance_token() } pub fn is_whitespace(c: char) -> bool { @@ -186,7 +198,11 @@ impl Cursor<'_> { let mut raw = original_chars2.collect::(); // Cut the original tokens to the length of the token raw.truncate(len); - Token::new(token_kind, len, raw, self.pos()) + let position = self.pos(); + let token = Token::new(token_kind, len, raw, position); + + dbg!(&token); + token } /// Eats symbols while predicate returns true or until the end of file is reached. diff --git a/src/lexer/tests.rs b/src/lexer/tests.rs index ef6a1a3..b3ed511 100644 --- a/src/lexer/tests.rs +++ b/src/lexer/tests.rs @@ -13,7 +13,11 @@ mod tests { len: 1, kind: TokenKind::Literal(Value::Int), raw: "1".to_owned(), - pos: 0 + pos: Position { + raw: 0, + line: 1, + offset: 0 + } } ); @@ -23,7 +27,11 @@ mod tests { len: 1, kind: TokenKind::Whitespace, raw: " ".to_owned(), - pos: 1 + pos: Position { + raw: 1, + line: 1, + offset: 1 + } } ); @@ -33,7 +41,11 @@ mod tests { len: 1, kind: TokenKind::Assign, raw: "=".to_owned(), - pos: 2 + pos: Position { + raw: 2, + line: 1, + offset: 2 + } } ); @@ -43,7 +55,11 @@ mod tests { len: 1, kind: TokenKind::Whitespace, raw: " ".to_owned(), - pos: 3 + pos: Position { + raw: 3, + line: 1, + offset: 3 + } } ); @@ -53,7 +69,11 @@ mod tests { len: 1, kind: TokenKind::Literal(Value::Int), raw: "2".to_owned(), - pos: 4 + pos: Position { + raw: 4, + line: 1, + offset: 4 + } } ); } @@ -68,7 +88,11 @@ mod tests { len: 1, kind: TokenKind::Literal(Value::Int), raw: "1".to_owned(), - pos: 0 + pos: Position { + raw: 0, + line: 1, + offset: 0 + } } ); @@ -78,7 +102,11 @@ mod tests { len: 1, kind: TokenKind::Assign, raw: "=".to_owned(), - pos: 1 + pos: Position { + raw: 1, + line: 1, + offset: 1 + } } ); @@ -88,7 +116,11 @@ mod tests { len: 1, kind: TokenKind::Literal(Value::Int), raw: "2".to_owned(), - pos: 2 + pos: Position { + raw: 2, + line: 1, + offset: 2 + } } ); } @@ -103,7 +135,11 @@ mod tests { len: 4, kind: TokenKind::Keyword(Keyword::Boolean), raw: "true".to_owned(), - pos: 0 + pos: Position { + raw: 3, + line: 1, + offset: 3 + } } ); @@ -113,7 +149,11 @@ mod tests { len: 5, kind: TokenKind::Keyword(Keyword::Boolean), raw: "false".to_owned(), - pos: 5 + pos: Position { + raw: 9, + line: 1, + offset: 9 + } } ); } @@ -128,7 +168,11 @@ mod tests { len: 2, kind: TokenKind::Keyword(Keyword::Function), raw: "fn".to_owned(), - pos: 0 + pos: Position { + raw: 1, + line: 1, + offset: 1 + } } ); } @@ -136,8 +180,7 @@ mod tests { #[test] fn test_comments() { let mut tokens = tokenize( - " -// foo + "// foo fn fib() {} ", ) @@ -154,7 +197,11 @@ fn fib() {} len: 6, kind: TokenKind::Comment, raw: "// foo".to_owned(), - pos: 1 + pos: Position { + raw: 5, + line: 1, + offset: 5 + } } ); @@ -164,7 +211,11 @@ fn fib() {} len: 2, kind: TokenKind::Keyword(Keyword::Function), raw: "fn".to_owned(), - pos: 8 + pos: Position { + raw: 8, + line: 2, + offset: 2 + } } ); } diff --git a/src/main.rs b/src/main.rs index c65cac5..e006c2d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ use std::io::Read; mod lexer; mod parser; +mod util; fn main() -> std::io::Result<()> { let mut file = File::open("examples/hello_world.sb")?; @@ -12,7 +13,7 @@ fn main() -> std::io::Result<()> { let tokens = lexer::tokenize(&contents); // let ast = parser::parse(tokens.into_iter()); - let program = parser::parse(tokens).unwrap(); + let program = parser::parse(tokens, Some(contents)); println!("{:#?}", program); Ok(()) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7456b62..7918d13 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,6 +1,7 @@ use crate::lexer::Keyword; use crate::lexer::{Token, TokenKind, Value}; use crate::parser::node_type::*; +use crate::util::string_util::highlight_position_in_file; use std::iter::Peekable; use std::vec::IntoIter; @@ -12,10 +13,11 @@ mod tests; pub struct Parser { tokens: Peekable>, peeked: Vec, + raw: Option, } impl Parser { - pub fn new(tokens: Vec) -> Parser { + pub fn new(tokens: Vec, raw: Option) -> Parser { // FIXME: Fiter without collecting? let tokens_without_whitespace: Vec = tokens .into_iter() @@ -24,6 +26,7 @@ impl Parser { Parser { tokens: tokens_without_whitespace.into_iter().peekable(), peeked: vec![], + raw: raw, } } @@ -71,10 +74,8 @@ impl Parser { fn match_token(&mut self, token_kind: TokenKind) -> Result { match self.next() { Some(token) if token.kind == token_kind => Ok(token), - other => Err(format!( - "Token {:?} not found, found {:?}", - token_kind, other - )), + Some(other) => Err(self.make_error(token_kind, other)), + None => Err("Token expected".to_string()), } } @@ -91,9 +92,9 @@ impl Parser { fn match_keyword(&mut self, keyword: Keyword) -> Result<(), String> { let token = self.next_token(); - match token.kind { - TokenKind::Keyword(k) if k == keyword => Ok(()), - other => Err(format!("Expected SemiColon, found {:?}", other)), + match &token.kind { + TokenKind::Keyword(ref k) if k == &keyword => Ok(()), + _ => Err(self.make_error(TokenKind::SemiColon, token)), } } @@ -103,6 +104,18 @@ impl Parser { other => Err(format!("Expected Identifier, found {:?}", other)), } } + + fn make_error(&mut self, token_kind: TokenKind, other: Token) -> String { + match &self.raw { + Some(raw_file) => format!( + "Token {:?} not found, found {:?}\n{:?}", + token_kind, + other, + highlight_position_in_file(raw_file.to_string(), other.to_owned().pos) + ), + None => format!("Token {:?} not found, found {:?}", token_kind, other), + } + } } impl Parser { @@ -148,7 +161,7 @@ impl Parser { fn parse_statement(&mut self) -> Result { let token = self.next_token(); dbg!(&token); - match token.kind { + match &token.kind { TokenKind::Keyword(Keyword::Let) => { let state = self.parse_declare(); self.match_token(TokenKind::SemiColon)?; @@ -161,7 +174,7 @@ impl Parser { Ok(state) } - other => Err(format!("Expected Statement, found {:?}", other)), + _ => Err(self.make_error(TokenKind::Unknown, token)), } } @@ -202,8 +215,8 @@ impl Parser { } } -pub fn parse(tokens: Vec) -> Result { - let mut parser = Parser::new(tokens); +pub fn parse(tokens: Vec, raw: Option) -> Result { + let mut parser = Parser::new(tokens, raw); parser.parse() } diff --git a/src/parser/tests.rs b/src/parser/tests.rs index 0ed23b0..17d4402 100644 --- a/src/parser/tests.rs +++ b/src/parser/tests.rs @@ -3,8 +3,9 @@ use crate::parser::*; #[test] fn test_parse_empty_function() { - let tokens = tokenize("fn main() {}"); - let tree = parse(tokens); + let raw = "fn main() {}"; + let tokens = tokenize(raw); + let tree = parse(tokens, Some(raw.to_string())); assert!(tree.is_ok()) } @@ -16,7 +17,7 @@ fn test_parse_function_with_return() { } "; let tokens = tokenize(raw); - let tree = parse(tokens); + let tree = parse(tokens, Some(raw.to_string())); assert!(tree.is_ok()) } @@ -28,7 +29,7 @@ fn test_parse_missing_semicolon() { } "; let tokens = tokenize(raw); - let tree = parse(tokens); + let tree = parse(tokens, Some(raw.to_string())); assert!(tree.is_err()) } @@ -38,7 +39,7 @@ fn test_parse_no_function_context() { let x = 1; "; let tokens = tokenize(raw); - let tree = parse(tokens); + let tree = parse(tokens, Some(raw.to_string())); assert!(tree.is_err()) } @@ -56,7 +57,7 @@ fn test_parse_multiple_functions() { } "; let tokens = tokenize(raw); - let tree = parse(tokens); + let tree = parse(tokens, Some(raw.to_string())); assert!(tree.is_ok()) } @@ -69,6 +70,6 @@ fn test_parse_variable_declaration() { } "; let tokens = tokenize(raw); - let tree = parse(tokens); + let tree = parse(tokens, Some(raw.to_string())); assert!(tree.is_ok()) } diff --git a/src/util/mod.rs b/src/util/mod.rs new file mode 100644 index 0000000..3edd819 --- /dev/null +++ b/src/util/mod.rs @@ -0,0 +1 @@ +pub mod string_util; diff --git a/src/util/string_util.rs b/src/util/string_util.rs new file mode 100644 index 0000000..204b990 --- /dev/null +++ b/src/util/string_util.rs @@ -0,0 +1,17 @@ +use crate::lexer::Position; + +pub fn highlight_position_in_file(input: String, position: Position) -> String { + // TODO: Chain without collecting in between + input + .chars() + .skip(position.raw) + .take_while(|c| c != &'\n') + .collect::() + .chars() + .rev() + .take_while(|c| c != &'\n') + .collect::() + .chars() + .rev() + .collect::() +}