Browse Source

Add token positions

github-actions
Garrit Franke 3 years ago
parent
commit
b0fe2b0474
  1. 14
      src/lexer/cursor.rs
  2. 19
      src/lexer/mod.rs
  3. 42
      src/lexer/tests.rs

14
src/lexer/cursor.rs

@ -5,7 +5,8 @@ use std::str::Chars;
/// Next characters can be peeked via `nth_char` method,
/// and position can be shifted forward via `bump` method.
pub(crate) struct Cursor<'a> {
initial_len: usize,
initial_length: usize,
len: usize,
chars: Chars<'a>,
prev: char,
}
@ -13,9 +14,10 @@ pub(crate) struct Cursor<'a> {
pub(crate) const EOF_CHAR: char = '\0';
impl<'a> Cursor<'a> {
pub(crate) fn new(input: &'a str) -> Cursor<'a> {
pub(crate) fn new(input: &'a str, initial_len: usize) -> Cursor<'a> {
Cursor {
initial_len: input.len(),
initial_length: initial_len,
len: input.len(),
chars: input.chars(),
#[cfg(debug_assertions)]
prev: EOF_CHAR,
@ -56,7 +58,7 @@ impl<'a> Cursor<'a> {
/// Returns amount of already consumed symbols.
pub(crate) fn len_consumed(&self) -> usize {
self.initial_len - self.chars.as_str().len()
self.len - self.chars.as_str().len()
}
/// Returns a `Chars` iterator over the remaining characters.
@ -64,6 +66,10 @@ impl<'a> Cursor<'a> {
self.chars.clone()
}
pub(crate) fn pos(&self) -> usize {
self.initial_length - self.len
}
/// Moves to the next character.
pub(crate) fn bump(&mut self) -> Option<char> {
let c = self.chars.next()?;

19
src/lexer/mod.rs

@ -11,11 +11,17 @@ pub struct Token {
pub kind: TokenKind,
pub len: usize,
pub raw: String,
pub pos: usize,
}
impl Token {
fn new(kind: TokenKind, len: usize, raw: String) -> Token {
Token { kind, len, raw }
fn new(kind: TokenKind, len: usize, raw: String, pos: usize) -> Token {
Token {
kind,
len,
raw,
pos,
}
}
}
@ -89,11 +95,12 @@ pub enum Keyword {
/// Creates an iterator that produces tokens from the input string.
pub fn tokenize(mut input: &str) -> Vec<Token> {
let mut initial_length = input.len();
std::iter::from_fn(move || {
if input.is_empty() {
return None;
}
let token = first_token(input);
let token = first_token(input, initial_length);
input = &input[token.len..];
Some(token)
})
@ -101,9 +108,9 @@ pub fn tokenize(mut input: &str) -> Vec<Token> {
}
/// Parses the first token from the provided input string.
pub fn first_token(input: &str) -> Token {
pub fn first_token(input: &str, initial_len: usize) -> Token {
debug_assert!(!input.is_empty());
Cursor::new(input).advance_token()
Cursor::new(input, initial_len).advance_token()
}
pub fn is_whitespace(c: char) -> bool {
@ -179,7 +186,7 @@ impl Cursor<'_> {
let mut raw = original_chars2.collect::<String>();
// Cut the original tokens to the length of the token
raw.truncate(len);
Token::new(token_kind, len, raw)
Token::new(token_kind, len, raw, self.pos())
}
/// Eats symbols while predicate returns true or until the end of file is reached.

42
src/lexer/tests.rs

@ -4,14 +4,16 @@ mod tests {
#[test]
fn test_basic_tokenizing() {
let mut tokens = tokenize("1 = 2").into_iter();
let raw = tokenize("1 = 2");
let mut tokens = raw.into_iter();
assert_eq!(
tokens.nth(0).unwrap(),
Token {
len: 1,
kind: TokenKind::Literal(Value::Int),
raw: "1".to_owned()
raw: "1".to_owned(),
pos: 0
}
);
@ -20,7 +22,8 @@ mod tests {
Token {
len: 1,
kind: TokenKind::Whitespace,
raw: " ".to_owned()
raw: " ".to_owned(),
pos: 1
}
);
@ -29,7 +32,8 @@ mod tests {
Token {
len: 1,
kind: TokenKind::Assign,
raw: "=".to_owned()
raw: "=".to_owned(),
pos: 2
}
);
@ -38,7 +42,8 @@ mod tests {
Token {
len: 1,
kind: TokenKind::Whitespace,
raw: " ".to_owned()
raw: " ".to_owned(),
pos: 3
}
);
@ -47,7 +52,8 @@ mod tests {
Token {
len: 1,
kind: TokenKind::Literal(Value::Int),
raw: "2".to_owned()
raw: "2".to_owned(),
pos: 4
}
);
}
@ -61,7 +67,8 @@ mod tests {
Token {
len: 1,
kind: TokenKind::Literal(Value::Int),
raw: "1".to_owned()
raw: "1".to_owned(),
pos: 0
}
);
@ -70,7 +77,8 @@ mod tests {
Token {
len: 1,
kind: TokenKind::Assign,
raw: "=".to_owned()
raw: "=".to_owned(),
pos: 1
}
);
@ -79,7 +87,8 @@ mod tests {
Token {
len: 1,
kind: TokenKind::Literal(Value::Int),
raw: "2".to_owned()
raw: "2".to_owned(),
pos: 2
}
);
}
@ -93,7 +102,8 @@ mod tests {
Token {
len: 4,
kind: TokenKind::Keyword(Keyword::Boolean),
raw: "true".to_owned()
raw: "true".to_owned(),
pos: 0
}
);
@ -102,7 +112,8 @@ mod tests {
Token {
len: 5,
kind: TokenKind::Keyword(Keyword::Boolean),
raw: "false".to_owned()
raw: "false".to_owned(),
pos: 5
}
);
}
@ -116,7 +127,8 @@ mod tests {
Token {
len: 2,
kind: TokenKind::Keyword(Keyword::Function),
raw: "fn".to_owned()
raw: "fn".to_owned(),
pos: 0
}
);
}
@ -125,8 +137,8 @@ mod tests {
fn test_comments() {
let mut tokens = tokenize(
"
// foo
fn fib() {}
// foo
fn fib() {}
",
)
.into_iter()
@ -142,6 +154,7 @@ mod tests {
len: 6,
kind: TokenKind::Comment,
raw: "// foo".to_owned(),
pos: 1
}
);
@ -151,6 +164,7 @@ mod tests {
len: 2,
kind: TokenKind::Keyword(Keyword::Function),
raw: "fn".to_owned(),
pos: 8
}
);
}

Loading…
Cancel
Save