rat/src/lexer.rs
2026-05-21 17:39:43 +02:00

76 lines
No EOL
2.2 KiB
Rust

use crate::token::Token;
pub struct Lexer {
source: Vec<u8>,
pos: usize
}
impl Lexer {
pub fn new(source: &str) -> Self {
Self {
source: source.as_bytes().to_vec(),
pos: 0
}
}
fn skip_whitespace(&mut self) {
while matches!(self.peek(), Some(c) if c.is_ascii_whitespace()) {
self.advance();
}
}
fn advance(&mut self) {
self.pos += 1;
}
fn peek(&self) -> Option<u8> {
self.source.get(self.pos).copied()
}
fn number(&mut self) -> Option<Token> {
let mut num = String::new();
while matches!(self.peek(), Some(b'0' ..= b'9')) {
num.push(self.peek().unwrap() as char);
self.advance();
}
Some(Token::Number(num.parse().unwrap()))
}
fn ident(&mut self) -> Option<Token> {
let mut ident = String::new();
while matches!(self.peek(), Some(b'a' ..= b'z' | b'A' ..= b'Z')) {
ident.push(self.peek().unwrap() as char);
}
Some(Token::Ident(ident))
}
pub fn next(&mut self) -> Option<Token> {
self.skip_whitespace();
match self.peek() {
Some(b'0' ..= b'9') => self.number(),
Some(b'a' ..= b'z' | b'A' ..= b'Z') => self.ident(),
Some(b'+') => {self.advance(); Some(Token::Plus)},
Some(b'-') => {self.advance(); Some(Token::Minus)},
Some(b'*') => {self.advance(); Some(Token::Asterisk)},
Some(b'/') => {self.advance(); Some(Token::Slash)},
Some(b'%') => {self.advance(); Some(Token::Percent)},
Some(b'^') => {self.advance(); Some(Token::Caret)},
Some(b'(') => {self.advance(); Some(Token::LParen)},
Some(b')') => {self.advance(); Some(Token::RParen)},
Some(b'[') => {self.advance(); Some(Token::LBracket)},
Some(b']') => {self.advance(); Some(Token::RBracket)},
Some(b'{') => {self.advance(); Some(Token::LBrace)},
Some(b'}') => {self.advance(); Some(Token::RBrace)},
Some(b';') => {self.advance(); Some(Token::Semicolon)}
_ => None
}
}
}