Browse Source

Add token positions

github-actions
Garrit Franke 3 years ago
parent
commit
b0fe2b0474
  1. 14
      src/lexer/cursor.rs
  2. 19
      src/lexer/mod.rs
  3. 42
      src/lexer/tests.rs

14
src/lexer/cursor.rs

@ -5,7 +5,8 @@ use std::str::Chars;
/// Next characters can be peeked via `nth_char` method, /// Next characters can be peeked via `nth_char` method,
/// and position can be shifted forward via `bump` method. /// and position can be shifted forward via `bump` method.
pub(crate) struct Cursor<'a> { pub(crate) struct Cursor<'a> {
initial_len: usize, initial_length: usize,
len: usize,
chars: Chars<'a>, chars: Chars<'a>,
prev: char, prev: char,
} }
@ -13,9 +14,10 @@ pub(crate) struct Cursor<'a> {
pub(crate) const EOF_CHAR: char = '\0'; pub(crate) const EOF_CHAR: char = '\0';
impl<'a> Cursor<'a> { impl<'a> Cursor<'a> {
pub(crate) fn new(input: &'a str) -> Cursor<'a> { pub(crate) fn new(input: &'a str, initial_len: usize) -> Cursor<'a> {
Cursor { Cursor {
initial_len: input.len(), initial_length: initial_len,
len: input.len(),
chars: input.chars(), chars: input.chars(),
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
prev: EOF_CHAR, prev: EOF_CHAR,
@ -56,7 +58,7 @@ impl<'a> Cursor<'a> {
/// Returns amount of already consumed symbols. /// Returns amount of already consumed symbols.
pub(crate) fn len_consumed(&self) -> usize { pub(crate) fn len_consumed(&self) -> usize {
self.initial_len - self.chars.as_str().len() self.len - self.chars.as_str().len()
} }
/// Returns a `Chars` iterator over the remaining characters. /// Returns a `Chars` iterator over the remaining characters.
@ -64,6 +66,10 @@ impl<'a> Cursor<'a> {
self.chars.clone() self.chars.clone()
} }
pub(crate) fn pos(&self) -> usize {
self.initial_length - self.len
}
/// Moves to the next character. /// Moves to the next character.
pub(crate) fn bump(&mut self) -> Option<char> { pub(crate) fn bump(&mut self) -> Option<char> {
let c = self.chars.next()?; let c = self.chars.next()?;

19
src/lexer/mod.rs

@ -11,11 +11,17 @@ pub struct Token {
pub kind: TokenKind, pub kind: TokenKind,
pub len: usize, pub len: usize,
pub raw: String, pub raw: String,
pub pos: usize,
} }
impl Token { impl Token {
fn new(kind: TokenKind, len: usize, raw: String) -> Token { fn new(kind: TokenKind, len: usize, raw: String, pos: usize) -> Token {
Token { kind, len, raw } Token {
kind,
len,
raw,
pos,
}
} }
} }
@ -89,11 +95,12 @@ pub enum Keyword {
/// Creates an iterator that produces tokens from the input string. /// Creates an iterator that produces tokens from the input string.
pub fn tokenize(mut input: &str) -> Vec<Token> { pub fn tokenize(mut input: &str) -> Vec<Token> {
let mut initial_length = input.len();
std::iter::from_fn(move || { std::iter::from_fn(move || {
if input.is_empty() { if input.is_empty() {
return None; return None;
} }
let token = first_token(input); let token = first_token(input, initial_length);
input = &input[token.len..]; input = &input[token.len..];
Some(token) Some(token)
}) })
@ -101,9 +108,9 @@ pub fn tokenize(mut input: &str) -> Vec<Token> {
} }
/// Parses the first token from the provided input string. /// Parses the first token from the provided input string.
pub fn first_token(input: &str) -> Token { pub fn first_token(input: &str, initial_len: usize) -> Token {
debug_assert!(!input.is_empty()); debug_assert!(!input.is_empty());
Cursor::new(input).advance_token() Cursor::new(input, initial_len).advance_token()
} }
pub fn is_whitespace(c: char) -> bool { pub fn is_whitespace(c: char) -> bool {
@ -179,7 +186,7 @@ impl Cursor<'_> {
let mut raw = original_chars2.collect::<String>(); let mut raw = original_chars2.collect::<String>();
// Cut the original tokens to the length of the token // Cut the original tokens to the length of the token
raw.truncate(len); raw.truncate(len);
Token::new(token_kind, len, raw) Token::new(token_kind, len, raw, self.pos())
} }
/// Eats symbols while predicate returns true or until the end of file is reached. /// Eats symbols while predicate returns true or until the end of file is reached.

42
src/lexer/tests.rs

@ -4,14 +4,16 @@ mod tests {
#[test] #[test]
fn test_basic_tokenizing() { fn test_basic_tokenizing() {
let mut tokens = tokenize("1 = 2").into_iter(); let raw = tokenize("1 = 2");
let mut tokens = raw.into_iter();
assert_eq!( assert_eq!(
tokens.nth(0).unwrap(), tokens.nth(0).unwrap(),
Token { Token {
len: 1, len: 1,
kind: TokenKind::Literal(Value::Int), kind: TokenKind::Literal(Value::Int),
raw: "1".to_owned() raw: "1".to_owned(),
pos: 0
} }
); );
@ -20,7 +22,8 @@ mod tests {
Token { Token {
len: 1, len: 1,
kind: TokenKind::Whitespace, kind: TokenKind::Whitespace,
raw: " ".to_owned() raw: " ".to_owned(),
pos: 1
} }
); );
@ -29,7 +32,8 @@ mod tests {
Token { Token {
len: 1, len: 1,
kind: TokenKind::Assign, kind: TokenKind::Assign,
raw: "=".to_owned() raw: "=".to_owned(),
pos: 2
} }
); );
@ -38,7 +42,8 @@ mod tests {
Token { Token {
len: 1, len: 1,
kind: TokenKind::Whitespace, kind: TokenKind::Whitespace,
raw: " ".to_owned() raw: " ".to_owned(),
pos: 3
} }
); );
@ -47,7 +52,8 @@ mod tests {
Token { Token {
len: 1, len: 1,
kind: TokenKind::Literal(Value::Int), kind: TokenKind::Literal(Value::Int),
raw: "2".to_owned() raw: "2".to_owned(),
pos: 4
} }
); );
} }
@ -61,7 +67,8 @@ mod tests {
Token { Token {
len: 1, len: 1,
kind: TokenKind::Literal(Value::Int), kind: TokenKind::Literal(Value::Int),
raw: "1".to_owned() raw: "1".to_owned(),
pos: 0
} }
); );
@ -70,7 +77,8 @@ mod tests {
Token { Token {
len: 1, len: 1,
kind: TokenKind::Assign, kind: TokenKind::Assign,
raw: "=".to_owned() raw: "=".to_owned(),
pos: 1
} }
); );
@ -79,7 +87,8 @@ mod tests {
Token { Token {
len: 1, len: 1,
kind: TokenKind::Literal(Value::Int), kind: TokenKind::Literal(Value::Int),
raw: "2".to_owned() raw: "2".to_owned(),
pos: 2
} }
); );
} }
@ -93,7 +102,8 @@ mod tests {
Token { Token {
len: 4, len: 4,
kind: TokenKind::Keyword(Keyword::Boolean), kind: TokenKind::Keyword(Keyword::Boolean),
raw: "true".to_owned() raw: "true".to_owned(),
pos: 0
} }
); );
@ -102,7 +112,8 @@ mod tests {
Token { Token {
len: 5, len: 5,
kind: TokenKind::Keyword(Keyword::Boolean), kind: TokenKind::Keyword(Keyword::Boolean),
raw: "false".to_owned() raw: "false".to_owned(),
pos: 5
} }
); );
} }
@ -116,7 +127,8 @@ mod tests {
Token { Token {
len: 2, len: 2,
kind: TokenKind::Keyword(Keyword::Function), kind: TokenKind::Keyword(Keyword::Function),
raw: "fn".to_owned() raw: "fn".to_owned(),
pos: 0
} }
); );
} }
@ -125,8 +137,8 @@ mod tests {
fn test_comments() { fn test_comments() {
let mut tokens = tokenize( let mut tokens = tokenize(
" "
// foo // foo
fn fib() {} fn fib() {}
", ",
) )
.into_iter() .into_iter()
@ -142,6 +154,7 @@ mod tests {
len: 6, len: 6,
kind: TokenKind::Comment, kind: TokenKind::Comment,
raw: "// foo".to_owned(), raw: "// foo".to_owned(),
pos: 1
} }
); );
@ -151,6 +164,7 @@ mod tests {
len: 2, len: 2,
kind: TokenKind::Keyword(Keyword::Function), kind: TokenKind::Keyword(Keyword::Function),
raw: "fn".to_owned(), raw: "fn".to_owned(),
pos: 8
} }
); );
} }

Loading…
Cancel
Save