diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..a824679 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,41 @@ +use std::{error::Error, fmt::Display}; + +#[derive(Debug)] +pub struct ParseError { + message: String +} + +impl ParseError { + pub fn new(message: impl Into) -> Self { + Self { message: message.into() } + } +} + +impl Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "parse error: {}", self.message) + } +} + +impl Error for ParseError {} + + + +#[derive(Debug)] +pub struct LexError { + message: String +} + +impl LexError { + pub fn new(message: impl Into) -> Self { + Self { message: message.into() } + } +} + +impl Display for LexError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "lex error: {}", self.message) + } +} + +impl Error for LexError {} \ No newline at end of file diff --git a/src/lexer.rs b/src/lexer.rs index 32d806a..f862a54 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,4 +1,6 @@ -use crate::token::Token; +use crate::{error::LexError, token::Token}; + +type LexResult = Result; pub struct Lexer { source: Vec, @@ -27,7 +29,7 @@ impl Lexer { self.source.get(self.pos).copied() } - fn number(&mut self) -> Option { + fn number(&mut self) -> LexResult { let mut num = String::new(); while matches!(self.peek(), Some(b'0' ..= b'9')) { @@ -35,42 +37,97 @@ impl Lexer { self.advance(); } - Some(Token::Number(num.parse().unwrap())) + Ok(Token::Number(num.parse().unwrap())) } - fn ident(&mut self) -> Option { + fn ident(&mut self) -> LexResult { let mut ident = String::new(); - while matches!(self.peek(), Some(b'a' ..= b'z' | b'A' ..= b'Z')) { + while matches!(self.peek(), Some(b'a' ..= b'z' | b'A' ..= b'Z' | b'0' ..= b'9' | b'_')) { ident.push(self.peek().unwrap() as char); + self.advance(); } - Some(Token::Ident(ident)) + let tok = match ident.as_str() { + "mov" => Token::Mov, + "if" => Token::If, + "else" => Token::Else, + "loop" => Token::Loop, + "while" => Token::While, + "for" => Token::For, + "in" => Token::In, + "fun" => Token::Fun, + "call" => Token::Call, + "break" => Token::Break, + "continue" => Token::Continue, + "true" => Token::True, + "false" => Token::False, + + "eq" => Token::Eq, + "new" => Token::Neq, + "lt" => Token::Lt, + "gt" => Token::Gt, + "lte" => Token::Lte, + "gte" => Token::Gte, + + _ => Token::Ident(ident) + }; + + Ok(tok) } - pub fn next(&mut self) -> Option { + fn string(&mut self) -> LexResult { + self.advance(); + let mut s = String::new(); + + loop { + match self.peek() { + Some(b'"') => { self.advance(); break; }, + Some(c) => { s.push(c as char); self.advance(); } + None => { return Err(LexError::new(format!( + "expected closing \"" + ))); } + } + } + + Ok(Token::StringLit(s)) + } + + pub fn next(&mut self) -> LexResult { self.skip_whitespace(); match self.peek() { Some(b'0' ..= b'9') => self.number(), Some(b'a' ..= b'z' | b'A' ..= b'Z') => self.ident(), - Some(b'+') => {self.advance(); Some(Token::Plus)}, - Some(b'-') => {self.advance(); Some(Token::Minus)}, - Some(b'*') => {self.advance(); Some(Token::Asterisk)}, - Some(b'/') => {self.advance(); Some(Token::Slash)}, - Some(b'%') => {self.advance(); Some(Token::Percent)}, - Some(b'^') => {self.advance(); Some(Token::Caret)}, + Some(b'+') => {self.advance(); Ok(Token::Plus)}, + Some(b'-') => {self.advance(); Ok(Token::Minus)}, + Some(b'*') => {self.advance(); Ok(Token::Asterisk)}, + Some(b'/') => {self.advance(); Ok(Token::Slash)}, + Some(b'%') => {self.advance(); Ok(Token::Percent)}, + Some(b'^') => {self.advance(); Ok(Token::Caret)}, - Some(b'(') => {self.advance(); Some(Token::LParen)}, - Some(b')') => {self.advance(); Some(Token::RParen)}, - Some(b'[') => {self.advance(); Some(Token::LBracket)}, - Some(b']') => {self.advance(); Some(Token::RBracket)}, - Some(b'{') => {self.advance(); Some(Token::LBrace)}, - Some(b'}') => {self.advance(); Some(Token::RBrace)}, + Some(b'(') => {self.advance(); Ok(Token::LParen)}, + Some(b')') => {self.advance(); Ok(Token::RParen)}, + Some(b'[') => {self.advance(); Ok(Token::LBracket)}, + Some(b']') => {self.advance(); Ok(Token::RBracket)}, + Some(b'{') => {self.advance(); Ok(Token::LBrace)}, + Some(b'}') => {self.advance(); Ok(Token::RBrace)}, - Some(b';') => {self.advance(); Some(Token::Semicolon)} - - _ => None + Some(b';') => {self.advance(); Ok(Token::Semicolon)}, + Some(b',') => {self.advance(); Ok(Token::Comma)}, + Some(b'.') => { + self.advance(); + + match self.peek() { + Some(b'.') => { self.advance(); Ok(Token::DotDot) }, + c => Err(LexError::new(format!( + "unexpected character after '.': {c:?}" + ))) + } + }, + Some(b'"') => self.string(), + + _ => Ok(Token::EOF) } } } \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index f0cd919..608fb86 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ use std::{fs::File, io::Read}; mod token; mod expr; mod lexer; +mod error; use lexer::Lexer; mod parser; use parser::Parser; @@ -20,7 +21,8 @@ fn main() -> std::io::Result<()> { let mut lexer = Lexer::new(&source); let mut tokens = Vec::new(); - while let Some(tok) = lexer.next() { + while let Ok(tok) = lexer.next() { + if tok == token::Token::EOF { break; } print!("{tok:?}, "); tokens.push(tok); } @@ -29,7 +31,7 @@ fn main() -> std::io::Result<()> { let mut parser = Parser::new(tokens); loop { - let parsed = parser.parse(); + let parsed = parser.parse().unwrap_or(expr::Expr::EOL); match parsed { expr::Expr::EOL => break, _ => { diff --git a/src/parser.rs b/src/parser.rs index 3afc6e0..181b4fa 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,5 +1,8 @@ use crate::token::Token; use crate::expr::{Expr, UnaryOp, BinaryOp, infix_bp, prefix_bp}; +use crate::error::ParseError; + +type ParseResult = Result; pub struct Parser { tokens: Vec, @@ -28,39 +31,49 @@ impl Parser { tok } - fn parse_atom(&mut self) -> Expr { + fn expect(&mut self, expected: &Token) -> ParseResult { match self.next() { - Some(Token::Number(n)) => Expr::Number(n), - Some(Token::Ident(id)) => Expr::Ident(id), - Some(Token::LParen) => { - let expr = self.parse_expr(0); - - match self.next() { - Some(Token::RParen) => expr, - _ => panic!("expected ')'") - } - }, - tok => panic!("unknown token: {tok:?}") + Some(tok) if &tok == expected => Ok(tok), + other => Err(ParseError::new(format!( + "expected {expected:?}, got {other:?}" + ))) } } - fn parse_prefix(&mut self) -> Expr { + fn parse_atom(&mut self) -> ParseResult { + match self.next() { + Some(Token::Number(n)) => Ok(Expr::Number(n)), + Some(Token::Ident(id)) => Ok(Expr::Ident(id)), + Some(Token::LParen) => { + let expr = self.parse_expr(0)?; + + self.expect(&Token::RParen)?; + + Ok(expr) + }, + tok => Err(ParseError::new(format!( + "unexpected token: {tok:?}" + ))) + } + } + + fn parse_prefix(&mut self) -> ParseResult { match self.peek() { Some(Token::Minus) => { self.advance(); let op = UnaryOp::Neg; let bp = prefix_bp(&op); - let rhs = self.parse_expr(bp); + let rhs = self.parse_expr(bp)?; - Expr::Unary { op, right: Box::new(rhs) } + Ok(Expr::Unary { op, right: Box::new(rhs) }) }, _ => self.parse_atom() } } - fn parse_expr(&mut self, min_bp: u8) -> Expr { - let mut lhs = self.parse_prefix(); + fn parse_expr(&mut self, min_bp: u8) -> ParseResult { + let mut lhs = self.parse_prefix()?; loop { let op = match self.peek() { @@ -81,16 +94,16 @@ impl Parser { self.advance(); - let rhs = self.parse_expr(right_bp); + let rhs = self.parse_expr(right_bp)?; lhs = Expr::Binary { left: Box::new(lhs), op, right: Box::new(rhs) } } - lhs + Ok(lhs) } - pub fn parse(&mut self) -> Expr { - if self.peek().is_none() {return Expr::EOL} + pub fn parse(&mut self) -> ParseResult { + if self.peek().is_none() {return Ok(Expr::EOL)} self.parse_expr(0) } } \ No newline at end of file diff --git a/src/token.rs b/src/token.rs index c193fbe..f54fca0 100644 --- a/src/token.rs +++ b/src/token.rs @@ -2,6 +2,7 @@ pub enum Token { Number(i64), Ident(String), + StringLit(String), // Operators Plus, @@ -19,5 +20,33 @@ pub enum Token { LBracket, RBracket, - Semicolon + // Separators + Semicolon, + Comma, + + // Keywords + Mov, + If, + Else, + Loop, + While, + For, + In, + Fun, + Call, + Break, + Continue, + True, + False, + DotDot, + + // Conditionals + Eq, + Neq, + Lt, + Gt, + Lte, + Gte, + + EOF } \ No newline at end of file