Compare commits

..

2 commits

Author SHA1 Message Date
Raptorox
a21b119e58
added keywords, LexError 2026-06-17 18:08:02 +02:00
Raptorox
8aeb6e543e
added ParseResult 2026-06-16 23:37:18 +02:00
5 changed files with 188 additions and 46 deletions

41
src/error.rs Normal file
View file

@ -0,0 +1,41 @@
use std::{error::Error, fmt::Display};
#[derive(Debug)]
pub struct ParseError {
message: String
}
impl ParseError {
pub fn new(message: impl Into<String>) -> Self {
Self { message: message.into() }
}
}
impl Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "parse error: {}", self.message)
}
}
impl Error for ParseError {}
#[derive(Debug)]
pub struct LexError {
message: String
}
impl LexError {
pub fn new(message: impl Into<String>) -> Self {
Self { message: message.into() }
}
}
impl Display for LexError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "lex error: {}", self.message)
}
}
impl Error for LexError {}

View file

@ -1,4 +1,6 @@
use crate::token::Token; use crate::{error::LexError, token::Token};
type LexResult<T> = Result<T, LexError>;
pub struct Lexer { pub struct Lexer {
source: Vec<u8>, source: Vec<u8>,
@ -27,7 +29,7 @@ impl Lexer {
self.source.get(self.pos).copied() self.source.get(self.pos).copied()
} }
fn number(&mut self) -> Option<Token> { fn number(&mut self) -> LexResult<Token> {
let mut num = String::new(); let mut num = String::new();
while matches!(self.peek(), Some(b'0' ..= b'9')) { while matches!(self.peek(), Some(b'0' ..= b'9')) {
@ -35,42 +37,97 @@ impl Lexer {
self.advance(); self.advance();
} }
Some(Token::Number(num.parse().unwrap())) Ok(Token::Number(num.parse().unwrap()))
} }
fn ident(&mut self) -> Option<Token> { fn ident(&mut self) -> LexResult<Token> {
let mut ident = String::new(); let mut ident = String::new();
while matches!(self.peek(), Some(b'a' ..= b'z' | b'A' ..= b'Z')) { while matches!(self.peek(), Some(b'a' ..= b'z' | b'A' ..= b'Z' | b'0' ..= b'9' | b'_')) {
ident.push(self.peek().unwrap() as char); ident.push(self.peek().unwrap() as char);
self.advance();
} }
Some(Token::Ident(ident)) let tok = match ident.as_str() {
"mov" => Token::Mov,
"if" => Token::If,
"else" => Token::Else,
"loop" => Token::Loop,
"while" => Token::While,
"for" => Token::For,
"in" => Token::In,
"fun" => Token::Fun,
"call" => Token::Call,
"break" => Token::Break,
"continue" => Token::Continue,
"true" => Token::True,
"false" => Token::False,
"eq" => Token::Eq,
"new" => Token::Neq,
"lt" => Token::Lt,
"gt" => Token::Gt,
"lte" => Token::Lte,
"gte" => Token::Gte,
_ => Token::Ident(ident)
};
Ok(tok)
} }
pub fn next(&mut self) -> Option<Token> { fn string(&mut self) -> LexResult<Token> {
self.advance();
let mut s = String::new();
loop {
match self.peek() {
Some(b'"') => { self.advance(); break; },
Some(c) => { s.push(c as char); self.advance(); }
None => { return Err(LexError::new(format!(
"expected closing \""
))); }
}
}
Ok(Token::StringLit(s))
}
pub fn next(&mut self) -> LexResult<Token> {
self.skip_whitespace(); self.skip_whitespace();
match self.peek() { match self.peek() {
Some(b'0' ..= b'9') => self.number(), Some(b'0' ..= b'9') => self.number(),
Some(b'a' ..= b'z' | b'A' ..= b'Z') => self.ident(), Some(b'a' ..= b'z' | b'A' ..= b'Z') => self.ident(),
Some(b'+') => {self.advance(); Some(Token::Plus)}, Some(b'+') => {self.advance(); Ok(Token::Plus)},
Some(b'-') => {self.advance(); Some(Token::Minus)}, Some(b'-') => {self.advance(); Ok(Token::Minus)},
Some(b'*') => {self.advance(); Some(Token::Asterisk)}, Some(b'*') => {self.advance(); Ok(Token::Asterisk)},
Some(b'/') => {self.advance(); Some(Token::Slash)}, Some(b'/') => {self.advance(); Ok(Token::Slash)},
Some(b'%') => {self.advance(); Some(Token::Percent)}, Some(b'%') => {self.advance(); Ok(Token::Percent)},
Some(b'^') => {self.advance(); Some(Token::Caret)}, Some(b'^') => {self.advance(); Ok(Token::Caret)},
Some(b'(') => {self.advance(); Some(Token::LParen)}, Some(b'(') => {self.advance(); Ok(Token::LParen)},
Some(b')') => {self.advance(); Some(Token::RParen)}, Some(b')') => {self.advance(); Ok(Token::RParen)},
Some(b'[') => {self.advance(); Some(Token::LBracket)}, Some(b'[') => {self.advance(); Ok(Token::LBracket)},
Some(b']') => {self.advance(); Some(Token::RBracket)}, Some(b']') => {self.advance(); Ok(Token::RBracket)},
Some(b'{') => {self.advance(); Some(Token::LBrace)}, Some(b'{') => {self.advance(); Ok(Token::LBrace)},
Some(b'}') => {self.advance(); Some(Token::RBrace)}, Some(b'}') => {self.advance(); Ok(Token::RBrace)},
Some(b';') => {self.advance(); Some(Token::Semicolon)} Some(b';') => {self.advance(); Ok(Token::Semicolon)},
Some(b',') => {self.advance(); Ok(Token::Comma)},
_ => None Some(b'.') => {
self.advance();
match self.peek() {
Some(b'.') => { self.advance(); Ok(Token::DotDot) },
c => Err(LexError::new(format!(
"unexpected character after '.': {c:?}"
)))
}
},
Some(b'"') => self.string(),
_ => Ok(Token::EOF)
} }
} }
} }

View file

@ -3,6 +3,7 @@ use std::{fs::File, io::Read};
mod token; mod token;
mod expr; mod expr;
mod lexer; mod lexer;
mod error;
use lexer::Lexer; use lexer::Lexer;
mod parser; mod parser;
use parser::Parser; use parser::Parser;
@ -20,7 +21,8 @@ fn main() -> std::io::Result<()> {
let mut lexer = Lexer::new(&source); let mut lexer = Lexer::new(&source);
let mut tokens = Vec::new(); let mut tokens = Vec::new();
while let Some(tok) = lexer.next() { while let Ok(tok) = lexer.next() {
if tok == token::Token::EOF { break; }
print!("{tok:?}, "); print!("{tok:?}, ");
tokens.push(tok); tokens.push(tok);
} }
@ -29,7 +31,7 @@ fn main() -> std::io::Result<()> {
let mut parser = Parser::new(tokens); let mut parser = Parser::new(tokens);
loop { loop {
let parsed = parser.parse(); let parsed = parser.parse().unwrap_or(expr::Expr::EOL);
match parsed { match parsed {
expr::Expr::EOL => break, expr::Expr::EOL => break,
_ => { _ => {

View file

@ -1,5 +1,8 @@
use crate::token::Token; use crate::token::Token;
use crate::expr::{Expr, UnaryOp, BinaryOp, infix_bp, prefix_bp}; use crate::expr::{Expr, UnaryOp, BinaryOp, infix_bp, prefix_bp};
use crate::error::ParseError;
type ParseResult<T> = Result<T, ParseError>;
pub struct Parser { pub struct Parser {
tokens: Vec<Token>, tokens: Vec<Token>,
@ -28,39 +31,49 @@ impl Parser {
tok tok
} }
fn parse_atom(&mut self) -> Expr { fn expect(&mut self, expected: &Token) -> ParseResult<Token> {
match self.next() { match self.next() {
Some(Token::Number(n)) => Expr::Number(n), Some(tok) if &tok == expected => Ok(tok),
Some(Token::Ident(id)) => Expr::Ident(id), other => Err(ParseError::new(format!(
Some(Token::LParen) => { "expected {expected:?}, got {other:?}"
let expr = self.parse_expr(0); )))
match self.next() {
Some(Token::RParen) => expr,
_ => panic!("expected ')'")
}
},
tok => panic!("unknown token: {tok:?}")
} }
} }
fn parse_prefix(&mut self) -> Expr { fn parse_atom(&mut self) -> ParseResult<Expr> {
match self.next() {
Some(Token::Number(n)) => Ok(Expr::Number(n)),
Some(Token::Ident(id)) => Ok(Expr::Ident(id)),
Some(Token::LParen) => {
let expr = self.parse_expr(0)?;
self.expect(&Token::RParen)?;
Ok(expr)
},
tok => Err(ParseError::new(format!(
"unexpected token: {tok:?}"
)))
}
}
fn parse_prefix(&mut self) -> ParseResult<Expr> {
match self.peek() { match self.peek() {
Some(Token::Minus) => { Some(Token::Minus) => {
self.advance(); self.advance();
let op = UnaryOp::Neg; let op = UnaryOp::Neg;
let bp = prefix_bp(&op); let bp = prefix_bp(&op);
let rhs = self.parse_expr(bp); let rhs = self.parse_expr(bp)?;
Expr::Unary { op, right: Box::new(rhs) } Ok(Expr::Unary { op, right: Box::new(rhs) })
}, },
_ => self.parse_atom() _ => self.parse_atom()
} }
} }
fn parse_expr(&mut self, min_bp: u8) -> Expr { fn parse_expr(&mut self, min_bp: u8) -> ParseResult<Expr> {
let mut lhs = self.parse_prefix(); let mut lhs = self.parse_prefix()?;
loop { loop {
let op = match self.peek() { let op = match self.peek() {
@ -81,16 +94,16 @@ impl Parser {
self.advance(); self.advance();
let rhs = self.parse_expr(right_bp); let rhs = self.parse_expr(right_bp)?;
lhs = Expr::Binary { left: Box::new(lhs), op, right: Box::new(rhs) } lhs = Expr::Binary { left: Box::new(lhs), op, right: Box::new(rhs) }
} }
lhs Ok(lhs)
} }
pub fn parse(&mut self) -> Expr { pub fn parse(&mut self) -> ParseResult<Expr> {
if self.peek().is_none() {return Expr::EOL} if self.peek().is_none() {return Ok(Expr::EOL)}
self.parse_expr(0) self.parse_expr(0)
} }
} }

View file

@ -2,6 +2,7 @@
pub enum Token { pub enum Token {
Number(i64), Number(i64),
Ident(String), Ident(String),
StringLit(String),
// Operators // Operators
Plus, Plus,
@ -19,5 +20,33 @@ pub enum Token {
LBracket, LBracket,
RBracket, RBracket,
Semicolon // Separators
Semicolon,
Comma,
// Keywords
Mov,
If,
Else,
Loop,
While,
For,
In,
Fun,
Call,
Break,
Continue,
True,
False,
DotDot,
// Conditionals
Eq,
Neq,
Lt,
Gt,
Lte,
Gte,
EOF
} }