From a21b119e580e692b8796cc1220706b65167ee07f Mon Sep 17 00:00:00 2001 From: Raptorox <70806316+Raptorox@users.noreply.github.com> Date: Wed, 17 Jun 2026 18:08:02 +0200 Subject: [PATCH] added keywords, LexError --- src/error.rs | 23 +++++++++++- src/lexer.rs | 101 ++++++++++++++++++++++++++++++++++++++++----------- src/main.rs | 3 +- src/token.rs | 31 +++++++++++++++- 4 files changed, 133 insertions(+), 25 deletions(-) diff --git a/src/error.rs b/src/error.rs index 36e4d9e..a824679 100644 --- a/src/error.rs +++ b/src/error.rs @@ -17,4 +17,25 @@ impl Display for ParseError { } } -impl Error for ParseError {} \ No newline at end of file +impl Error for ParseError {} + + + +#[derive(Debug)] +pub struct LexError { + message: String +} + +impl LexError { + pub fn new(message: impl Into) -> Self { + Self { message: message.into() } + } +} + +impl Display for LexError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "lex error: {}", self.message) + } +} + +impl Error for LexError {} \ No newline at end of file diff --git a/src/lexer.rs b/src/lexer.rs index 32d806a..f862a54 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,4 +1,6 @@ -use crate::token::Token; +use crate::{error::LexError, token::Token}; + +type LexResult = Result; pub struct Lexer { source: Vec, @@ -27,7 +29,7 @@ impl Lexer { self.source.get(self.pos).copied() } - fn number(&mut self) -> Option { + fn number(&mut self) -> LexResult { let mut num = String::new(); while matches!(self.peek(), Some(b'0' ..= b'9')) { @@ -35,42 +37,97 @@ impl Lexer { self.advance(); } - Some(Token::Number(num.parse().unwrap())) + Ok(Token::Number(num.parse().unwrap())) } - fn ident(&mut self) -> Option { + fn ident(&mut self) -> LexResult { let mut ident = String::new(); - while matches!(self.peek(), Some(b'a' ..= b'z' | b'A' ..= b'Z')) { + while matches!(self.peek(), Some(b'a' ..= b'z' | b'A' ..= b'Z' | b'0' ..= b'9' | b'_')) { ident.push(self.peek().unwrap() as char); + self.advance(); } - Some(Token::Ident(ident)) + let tok = match ident.as_str() { + "mov" => Token::Mov, + "if" => Token::If, + "else" => Token::Else, + "loop" => Token::Loop, + "while" => Token::While, + "for" => Token::For, + "in" => Token::In, + "fun" => Token::Fun, + "call" => Token::Call, + "break" => Token::Break, + "continue" => Token::Continue, + "true" => Token::True, + "false" => Token::False, + + "eq" => Token::Eq, + "new" => Token::Neq, + "lt" => Token::Lt, + "gt" => Token::Gt, + "lte" => Token::Lte, + "gte" => Token::Gte, + + _ => Token::Ident(ident) + }; + + Ok(tok) } - pub fn next(&mut self) -> Option { + fn string(&mut self) -> LexResult { + self.advance(); + let mut s = String::new(); + + loop { + match self.peek() { + Some(b'"') => { self.advance(); break; }, + Some(c) => { s.push(c as char); self.advance(); } + None => { return Err(LexError::new(format!( + "expected closing \"" + ))); } + } + } + + Ok(Token::StringLit(s)) + } + + pub fn next(&mut self) -> LexResult { self.skip_whitespace(); match self.peek() { Some(b'0' ..= b'9') => self.number(), Some(b'a' ..= b'z' | b'A' ..= b'Z') => self.ident(), - Some(b'+') => {self.advance(); Some(Token::Plus)}, - Some(b'-') => {self.advance(); Some(Token::Minus)}, - Some(b'*') => {self.advance(); Some(Token::Asterisk)}, - Some(b'/') => {self.advance(); Some(Token::Slash)}, - Some(b'%') => {self.advance(); Some(Token::Percent)}, - Some(b'^') => {self.advance(); Some(Token::Caret)}, + Some(b'+') => {self.advance(); Ok(Token::Plus)}, + Some(b'-') => {self.advance(); Ok(Token::Minus)}, + Some(b'*') => {self.advance(); Ok(Token::Asterisk)}, + Some(b'/') => {self.advance(); Ok(Token::Slash)}, + Some(b'%') => {self.advance(); Ok(Token::Percent)}, + Some(b'^') => {self.advance(); Ok(Token::Caret)}, - Some(b'(') => {self.advance(); Some(Token::LParen)}, - Some(b')') => {self.advance(); Some(Token::RParen)}, - Some(b'[') => {self.advance(); Some(Token::LBracket)}, - Some(b']') => {self.advance(); Some(Token::RBracket)}, - Some(b'{') => {self.advance(); Some(Token::LBrace)}, - Some(b'}') => {self.advance(); Some(Token::RBrace)}, + Some(b'(') => {self.advance(); Ok(Token::LParen)}, + Some(b')') => {self.advance(); Ok(Token::RParen)}, + Some(b'[') => {self.advance(); Ok(Token::LBracket)}, + Some(b']') => {self.advance(); Ok(Token::RBracket)}, + Some(b'{') => {self.advance(); Ok(Token::LBrace)}, + Some(b'}') => {self.advance(); Ok(Token::RBrace)}, - Some(b';') => {self.advance(); Some(Token::Semicolon)} - - _ => None + Some(b';') => {self.advance(); Ok(Token::Semicolon)}, + Some(b',') => {self.advance(); Ok(Token::Comma)}, + Some(b'.') => { + self.advance(); + + match self.peek() { + Some(b'.') => { self.advance(); Ok(Token::DotDot) }, + c => Err(LexError::new(format!( + "unexpected character after '.': {c:?}" + ))) + } + }, + Some(b'"') => self.string(), + + _ => Ok(Token::EOF) } } } \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 9405cc8..608fb86 100644 --- a/src/main.rs +++ b/src/main.rs @@ -21,7 +21,8 @@ fn main() -> std::io::Result<()> { let mut lexer = Lexer::new(&source); let mut tokens = Vec::new(); - while let Some(tok) = lexer.next() { + while let Ok(tok) = lexer.next() { + if tok == token::Token::EOF { break; } print!("{tok:?}, "); tokens.push(tok); } diff --git a/src/token.rs b/src/token.rs index c193fbe..f54fca0 100644 --- a/src/token.rs +++ b/src/token.rs @@ -2,6 +2,7 @@ pub enum Token { Number(i64), Ident(String), + StringLit(String), // Operators Plus, @@ -19,5 +20,33 @@ pub enum Token { LBracket, RBracket, - Semicolon + // Separators + Semicolon, + Comma, + + // Keywords + Mov, + If, + Else, + Loop, + While, + For, + In, + Fun, + Call, + Break, + Continue, + True, + False, + DotDot, + + // Conditionals + Eq, + Neq, + Lt, + Gt, + Lte, + Gte, + + EOF } \ No newline at end of file