added keywords, LexError

This commit is contained in:
Raptorox 2026-06-17 18:08:02 +02:00
parent 8aeb6e543e
commit a21b119e58
No known key found for this signature in database
GPG key ID: 8B3556FC3ED1F6D8
4 changed files with 133 additions and 25 deletions

View file

@ -17,4 +17,25 @@ impl Display for ParseError {
}
}
impl Error for ParseError {}
impl Error for ParseError {}
#[derive(Debug)]
pub struct LexError {
message: String
}
impl LexError {
pub fn new(message: impl Into<String>) -> Self {
Self { message: message.into() }
}
}
impl Display for LexError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "lex error: {}", self.message)
}
}
impl Error for LexError {}

View file

@ -1,4 +1,6 @@
use crate::token::Token;
use crate::{error::LexError, token::Token};
type LexResult<T> = Result<T, LexError>;
pub struct Lexer {
source: Vec<u8>,
@ -27,7 +29,7 @@ impl Lexer {
self.source.get(self.pos).copied()
}
fn number(&mut self) -> Option<Token> {
fn number(&mut self) -> LexResult<Token> {
let mut num = String::new();
while matches!(self.peek(), Some(b'0' ..= b'9')) {
@ -35,42 +37,97 @@ impl Lexer {
self.advance();
}
Some(Token::Number(num.parse().unwrap()))
Ok(Token::Number(num.parse().unwrap()))
}
fn ident(&mut self) -> Option<Token> {
fn ident(&mut self) -> LexResult<Token> {
let mut ident = String::new();
while matches!(self.peek(), Some(b'a' ..= b'z' | b'A' ..= b'Z')) {
while matches!(self.peek(), Some(b'a' ..= b'z' | b'A' ..= b'Z' | b'0' ..= b'9' | b'_')) {
ident.push(self.peek().unwrap() as char);
self.advance();
}
Some(Token::Ident(ident))
let tok = match ident.as_str() {
"mov" => Token::Mov,
"if" => Token::If,
"else" => Token::Else,
"loop" => Token::Loop,
"while" => Token::While,
"for" => Token::For,
"in" => Token::In,
"fun" => Token::Fun,
"call" => Token::Call,
"break" => Token::Break,
"continue" => Token::Continue,
"true" => Token::True,
"false" => Token::False,
"eq" => Token::Eq,
"new" => Token::Neq,
"lt" => Token::Lt,
"gt" => Token::Gt,
"lte" => Token::Lte,
"gte" => Token::Gte,
_ => Token::Ident(ident)
};
Ok(tok)
}
pub fn next(&mut self) -> Option<Token> {
fn string(&mut self) -> LexResult<Token> {
self.advance();
let mut s = String::new();
loop {
match self.peek() {
Some(b'"') => { self.advance(); break; },
Some(c) => { s.push(c as char); self.advance(); }
None => { return Err(LexError::new(format!(
"expected closing \""
))); }
}
}
Ok(Token::StringLit(s))
}
pub fn next(&mut self) -> LexResult<Token> {
self.skip_whitespace();
match self.peek() {
Some(b'0' ..= b'9') => self.number(),
Some(b'a' ..= b'z' | b'A' ..= b'Z') => self.ident(),
Some(b'+') => {self.advance(); Some(Token::Plus)},
Some(b'-') => {self.advance(); Some(Token::Minus)},
Some(b'*') => {self.advance(); Some(Token::Asterisk)},
Some(b'/') => {self.advance(); Some(Token::Slash)},
Some(b'%') => {self.advance(); Some(Token::Percent)},
Some(b'^') => {self.advance(); Some(Token::Caret)},
Some(b'+') => {self.advance(); Ok(Token::Plus)},
Some(b'-') => {self.advance(); Ok(Token::Minus)},
Some(b'*') => {self.advance(); Ok(Token::Asterisk)},
Some(b'/') => {self.advance(); Ok(Token::Slash)},
Some(b'%') => {self.advance(); Ok(Token::Percent)},
Some(b'^') => {self.advance(); Ok(Token::Caret)},
Some(b'(') => {self.advance(); Some(Token::LParen)},
Some(b')') => {self.advance(); Some(Token::RParen)},
Some(b'[') => {self.advance(); Some(Token::LBracket)},
Some(b']') => {self.advance(); Some(Token::RBracket)},
Some(b'{') => {self.advance(); Some(Token::LBrace)},
Some(b'}') => {self.advance(); Some(Token::RBrace)},
Some(b'(') => {self.advance(); Ok(Token::LParen)},
Some(b')') => {self.advance(); Ok(Token::RParen)},
Some(b'[') => {self.advance(); Ok(Token::LBracket)},
Some(b']') => {self.advance(); Ok(Token::RBracket)},
Some(b'{') => {self.advance(); Ok(Token::LBrace)},
Some(b'}') => {self.advance(); Ok(Token::RBrace)},
Some(b';') => {self.advance(); Some(Token::Semicolon)}
_ => None
Some(b';') => {self.advance(); Ok(Token::Semicolon)},
Some(b',') => {self.advance(); Ok(Token::Comma)},
Some(b'.') => {
self.advance();
match self.peek() {
Some(b'.') => { self.advance(); Ok(Token::DotDot) },
c => Err(LexError::new(format!(
"unexpected character after '.': {c:?}"
)))
}
},
Some(b'"') => self.string(),
_ => Ok(Token::EOF)
}
}
}

View file

@ -21,7 +21,8 @@ fn main() -> std::io::Result<()> {
let mut lexer = Lexer::new(&source);
let mut tokens = Vec::new();
while let Some(tok) = lexer.next() {
while let Ok(tok) = lexer.next() {
if tok == token::Token::EOF { break; }
print!("{tok:?}, ");
tokens.push(tok);
}

View file

@ -2,6 +2,7 @@
pub enum Token {
Number(i64),
Ident(String),
StringLit(String),
// Operators
Plus,
@ -19,5 +20,33 @@ pub enum Token {
LBracket,
RBracket,
Semicolon
// Separators
Semicolon,
Comma,
// Keywords
Mov,
If,
Else,
Loop,
While,
For,
In,
Fun,
Call,
Break,
Continue,
True,
False,
DotDot,
// Conditionals
Eq,
Neq,
Lt,
Gt,
Lte,
Gte,
EOF
}