From b0fe2b0474bb174cb55a9319e3ccd5f3e354d1b8 Mon Sep 17 00:00:00 2001
From: Garrit Franke <garritfranke@gmail.com>
Date: Fri, 4 Dec 2020 11:57:46 +0100
Subject: [PATCH] Add token positions

---
 src/lexer/cursor.rs | 14 ++++++++++----
 src/lexer/mod.rs    | 19 +++++++++++++------
 src/lexer/tests.rs  | 42 ++++++++++++++++++++++++++++--------------
 3 files changed, 51 insertions(+), 24 deletions(-)
diff --git a/src/lexer/cursor.rs b/src/lexer/cursor.rs
index 69a2de0..feb9301 100644
--- a/src/lexer/cursor.rs
+++ b/src/lexer/cursor.rs
@@ -5,7 +5,8 @@ use std::str::Chars;
 /// Next characters can be peeked via `nth_char` method,
 /// and position can be shifted forward via `bump` method.
 pub(crate) struct Cursor<'a> {
-    initial_len: usize,
+    initial_length: usize,
+    len: usize,
     chars: Chars<'a>,
     prev: char,
 }
@@ -13,9 +14,10 @@ pub(crate) struct Cursor<'a> {
 pub(crate) const EOF_CHAR: char = '\0';
 
 impl<'a> Cursor<'a> {
-    pub(crate) fn new(input: &'a str) -> Cursor<'a> {
+    pub(crate) fn new(input: &'a str, initial_len: usize) -> Cursor<'a> {
         Cursor {
-            initial_len: input.len(),
+            initial_length: initial_len,
+            len: input.len(),
             chars: input.chars(),
             #[cfg(debug_assertions)]
             prev: EOF_CHAR,
@@ -56,7 +58,7 @@ impl<'a> Cursor<'a> {
 
     /// Returns amount of already consumed symbols.
     pub(crate) fn len_consumed(&self) -> usize {
-        self.initial_len - self.chars.as_str().len()
+        self.len - self.chars.as_str().len()
     }
 
     /// Returns a `Chars` iterator over the remaining characters.
@@ -64,6 +66,10 @@ impl<'a> Cursor<'a> {
         self.chars.clone()
     }
 
+    pub(crate) fn pos(&self) -> usize {
+        self.initial_length - self.len
+    }
+
     /// Moves to the next character.
     pub(crate) fn bump(&mut self) -> Option<char> {
         let c = self.chars.next()?;
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
index 7e2c695..df4ce27 100644
--- a/src/lexer/mod.rs
+++ b/src/lexer/mod.rs
@@ -11,11 +11,17 @@ pub struct Token {
     pub kind: TokenKind,
     pub len: usize,
     pub raw: String,
+    pub pos: usize,
 }
 
 impl Token {
-    fn new(kind: TokenKind, len: usize, raw: String) -> Token {
-        Token { kind, len, raw }
+    fn new(kind: TokenKind, len: usize, raw: String, pos: usize) -> Token {
+        Token {
+            kind,
+            len,
+            raw,
+            pos,
+        }
     }
 }
 
@@ -89,11 +95,12 @@ pub enum Keyword {
 
 /// Creates an iterator that produces tokens from the input string.
 pub fn tokenize(mut input: &str) -> Vec<Token> {
+    let mut initial_length = input.len();
     std::iter::from_fn(move || {
         if input.is_empty() {
             return None;
         }
-        let token = first_token(input);
+        let token = first_token(input, initial_length);
         input = &input[token.len..];
         Some(token)
     })
@@ -101,9 +108,9 @@ pub fn tokenize(mut input: &str) -> Vec<Token> {
 }
 
 /// Parses the first token from the provided input string.
-pub fn first_token(input: &str) -> Token {
+pub fn first_token(input: &str, initial_len: usize) -> Token {
     debug_assert!(!input.is_empty());
-    Cursor::new(input).advance_token()
+    Cursor::new(input, initial_len).advance_token()
 }
 
 pub fn is_whitespace(c: char) -> bool {
@@ -179,7 +186,7 @@ impl Cursor<'_> {
         let mut raw = original_chars2.collect::<String>();
         // Cut the original tokens to the length of the token
         raw.truncate(len);
-        Token::new(token_kind, len, raw)
+        Token::new(token_kind, len, raw, self.pos())
     }
 
     /// Eats symbols while predicate returns true or until the end of file is reached.
diff --git a/src/lexer/tests.rs b/src/lexer/tests.rs
index 584096b..ef6a1a3 100644
--- a/src/lexer/tests.rs
+++ b/src/lexer/tests.rs
@@ -4,14 +4,16 @@ mod tests {
 
     #[test]
     fn test_basic_tokenizing() {
-        let mut tokens = tokenize("1 = 2").into_iter();
+        let raw = tokenize("1 = 2");
+        let mut tokens = raw.into_iter();
 
         assert_eq!(
             tokens.nth(0).unwrap(),
             Token {
                 len: 1,
                 kind: TokenKind::Literal(Value::Int),
-                raw: "1".to_owned()
+                raw: "1".to_owned(),
+                pos: 0
             }
         );
 
@@ -20,7 +22,8 @@ mod tests {
             Token {
                 len: 1,
                 kind: TokenKind::Whitespace,
-                raw: " ".to_owned()
+                raw: " ".to_owned(),
+                pos: 1
             }
         );
 
@@ -29,7 +32,8 @@ mod tests {
             Token {
                 len: 1,
                 kind: TokenKind::Assign,
-                raw: "=".to_owned()
+                raw: "=".to_owned(),
+                pos: 2
             }
         );
 
@@ -38,7 +42,8 @@ mod tests {
             Token {
                 len: 1,
                 kind: TokenKind::Whitespace,
-                raw: " ".to_owned()
+                raw: " ".to_owned(),
+                pos: 3
             }
         );
 
@@ -47,7 +52,8 @@ mod tests {
             Token {
                 len: 1,
                 kind: TokenKind::Literal(Value::Int),
-                raw: "2".to_owned()
+                raw: "2".to_owned(),
+                pos: 4
             }
         );
     }
@@ -61,7 +67,8 @@ mod tests {
             Token {
                 len: 1,
                 kind: TokenKind::Literal(Value::Int),
-                raw: "1".to_owned()
+                raw: "1".to_owned(),
+                pos: 0
             }
         );
 
@@ -70,7 +77,8 @@ mod tests {
             Token {
                 len: 1,
                 kind: TokenKind::Assign,
-                raw: "=".to_owned()
+                raw: "=".to_owned(),
+                pos: 1
             }
         );
 
@@ -79,7 +87,8 @@ mod tests {
             Token {
                 len: 1,
                 kind: TokenKind::Literal(Value::Int),
-                raw: "2".to_owned()
+                raw: "2".to_owned(),
+                pos: 2
             }
         );
     }
@@ -93,7 +102,8 @@ mod tests {
             Token {
                 len: 4,
                 kind: TokenKind::Keyword(Keyword::Boolean),
-                raw: "true".to_owned()
+                raw: "true".to_owned(),
+                pos: 0
             }
         );
 
@@ -102,7 +112,8 @@ mod tests {
             Token {
                 len: 5,
                 kind: TokenKind::Keyword(Keyword::Boolean),
-                raw: "false".to_owned()
+                raw: "false".to_owned(),
+                pos: 5
             }
         );
     }
@@ -116,7 +127,8 @@ mod tests {
             Token {
                 len: 2,
                 kind: TokenKind::Keyword(Keyword::Function),
-                raw: "fn".to_owned()
+                raw: "fn".to_owned(),
+                pos: 0
             }
         );
     }
@@ -125,8 +137,8 @@ mod tests {
     fn test_comments() {
         let mut tokens = tokenize(
             "
-        // foo
-        fn fib() {}
+// foo
+fn fib() {}
         ",
         )
         .into_iter()
@@ -142,6 +154,7 @@ mod tests {
                 len: 6,
                 kind: TokenKind::Comment,
                 raw: "// foo".to_owned(),
+                pos: 1
             }
         );
 
@@ -151,6 +164,7 @@ mod tests {
                 len: 2,
                 kind: TokenKind::Keyword(Keyword::Function),
                 raw: "fn".to_owned(),
+                pos: 8
             }
         );
     }