Compare commits
2 commits
a887bf4b3e
...
a21b119e58
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a21b119e58 | ||
|
|
8aeb6e543e |
5 changed files with 188 additions and 46 deletions
41
src/error.rs
Normal file
41
src/error.rs
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
use std::{error::Error, fmt::Display};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ParseError {
|
||||
message: String
|
||||
}
|
||||
|
||||
impl ParseError {
|
||||
pub fn new(message: impl Into<String>) -> Self {
|
||||
Self { message: message.into() }
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ParseError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "parse error: {}", self.message)
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for ParseError {}
|
||||
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LexError {
|
||||
message: String
|
||||
}
|
||||
|
||||
impl LexError {
|
||||
pub fn new(message: impl Into<String>) -> Self {
|
||||
Self { message: message.into() }
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for LexError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "lex error: {}", self.message)
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for LexError {}
|
||||
101
src/lexer.rs
101
src/lexer.rs
|
|
@ -1,4 +1,6 @@
|
|||
use crate::token::Token;
|
||||
use crate::{error::LexError, token::Token};
|
||||
|
||||
type LexResult<T> = Result<T, LexError>;
|
||||
|
||||
pub struct Lexer {
|
||||
source: Vec<u8>,
|
||||
|
|
@ -27,7 +29,7 @@ impl Lexer {
|
|||
self.source.get(self.pos).copied()
|
||||
}
|
||||
|
||||
fn number(&mut self) -> Option<Token> {
|
||||
fn number(&mut self) -> LexResult<Token> {
|
||||
let mut num = String::new();
|
||||
|
||||
while matches!(self.peek(), Some(b'0' ..= b'9')) {
|
||||
|
|
@ -35,42 +37,97 @@ impl Lexer {
|
|||
self.advance();
|
||||
}
|
||||
|
||||
Some(Token::Number(num.parse().unwrap()))
|
||||
Ok(Token::Number(num.parse().unwrap()))
|
||||
}
|
||||
|
||||
fn ident(&mut self) -> Option<Token> {
|
||||
fn ident(&mut self) -> LexResult<Token> {
|
||||
let mut ident = String::new();
|
||||
|
||||
while matches!(self.peek(), Some(b'a' ..= b'z' | b'A' ..= b'Z')) {
|
||||
while matches!(self.peek(), Some(b'a' ..= b'z' | b'A' ..= b'Z' | b'0' ..= b'9' | b'_')) {
|
||||
ident.push(self.peek().unwrap() as char);
|
||||
self.advance();
|
||||
}
|
||||
|
||||
Some(Token::Ident(ident))
|
||||
let tok = match ident.as_str() {
|
||||
"mov" => Token::Mov,
|
||||
"if" => Token::If,
|
||||
"else" => Token::Else,
|
||||
"loop" => Token::Loop,
|
||||
"while" => Token::While,
|
||||
"for" => Token::For,
|
||||
"in" => Token::In,
|
||||
"fun" => Token::Fun,
|
||||
"call" => Token::Call,
|
||||
"break" => Token::Break,
|
||||
"continue" => Token::Continue,
|
||||
"true" => Token::True,
|
||||
"false" => Token::False,
|
||||
|
||||
"eq" => Token::Eq,
|
||||
"new" => Token::Neq,
|
||||
"lt" => Token::Lt,
|
||||
"gt" => Token::Gt,
|
||||
"lte" => Token::Lte,
|
||||
"gte" => Token::Gte,
|
||||
|
||||
_ => Token::Ident(ident)
|
||||
};
|
||||
|
||||
Ok(tok)
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> Option<Token> {
|
||||
fn string(&mut self) -> LexResult<Token> {
|
||||
self.advance();
|
||||
let mut s = String::new();
|
||||
|
||||
loop {
|
||||
match self.peek() {
|
||||
Some(b'"') => { self.advance(); break; },
|
||||
Some(c) => { s.push(c as char); self.advance(); }
|
||||
None => { return Err(LexError::new(format!(
|
||||
"expected closing \""
|
||||
))); }
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Token::StringLit(s))
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> LexResult<Token> {
|
||||
self.skip_whitespace();
|
||||
match self.peek() {
|
||||
Some(b'0' ..= b'9') => self.number(),
|
||||
Some(b'a' ..= b'z' | b'A' ..= b'Z') => self.ident(),
|
||||
|
||||
Some(b'+') => {self.advance(); Some(Token::Plus)},
|
||||
Some(b'-') => {self.advance(); Some(Token::Minus)},
|
||||
Some(b'*') => {self.advance(); Some(Token::Asterisk)},
|
||||
Some(b'/') => {self.advance(); Some(Token::Slash)},
|
||||
Some(b'%') => {self.advance(); Some(Token::Percent)},
|
||||
Some(b'^') => {self.advance(); Some(Token::Caret)},
|
||||
Some(b'+') => {self.advance(); Ok(Token::Plus)},
|
||||
Some(b'-') => {self.advance(); Ok(Token::Minus)},
|
||||
Some(b'*') => {self.advance(); Ok(Token::Asterisk)},
|
||||
Some(b'/') => {self.advance(); Ok(Token::Slash)},
|
||||
Some(b'%') => {self.advance(); Ok(Token::Percent)},
|
||||
Some(b'^') => {self.advance(); Ok(Token::Caret)},
|
||||
|
||||
Some(b'(') => {self.advance(); Some(Token::LParen)},
|
||||
Some(b')') => {self.advance(); Some(Token::RParen)},
|
||||
Some(b'[') => {self.advance(); Some(Token::LBracket)},
|
||||
Some(b']') => {self.advance(); Some(Token::RBracket)},
|
||||
Some(b'{') => {self.advance(); Some(Token::LBrace)},
|
||||
Some(b'}') => {self.advance(); Some(Token::RBrace)},
|
||||
Some(b'(') => {self.advance(); Ok(Token::LParen)},
|
||||
Some(b')') => {self.advance(); Ok(Token::RParen)},
|
||||
Some(b'[') => {self.advance(); Ok(Token::LBracket)},
|
||||
Some(b']') => {self.advance(); Ok(Token::RBracket)},
|
||||
Some(b'{') => {self.advance(); Ok(Token::LBrace)},
|
||||
Some(b'}') => {self.advance(); Ok(Token::RBrace)},
|
||||
|
||||
Some(b';') => {self.advance(); Some(Token::Semicolon)}
|
||||
|
||||
_ => None
|
||||
Some(b';') => {self.advance(); Ok(Token::Semicolon)},
|
||||
Some(b',') => {self.advance(); Ok(Token::Comma)},
|
||||
Some(b'.') => {
|
||||
self.advance();
|
||||
|
||||
match self.peek() {
|
||||
Some(b'.') => { self.advance(); Ok(Token::DotDot) },
|
||||
c => Err(LexError::new(format!(
|
||||
"unexpected character after '.': {c:?}"
|
||||
)))
|
||||
}
|
||||
},
|
||||
Some(b'"') => self.string(),
|
||||
|
||||
_ => Ok(Token::EOF)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@ use std::{fs::File, io::Read};
|
|||
mod token;
|
||||
mod expr;
|
||||
mod lexer;
|
||||
mod error;
|
||||
use lexer::Lexer;
|
||||
mod parser;
|
||||
use parser::Parser;
|
||||
|
|
@ -20,7 +21,8 @@ fn main() -> std::io::Result<()> {
|
|||
let mut lexer = Lexer::new(&source);
|
||||
|
||||
let mut tokens = Vec::new();
|
||||
while let Some(tok) = lexer.next() {
|
||||
while let Ok(tok) = lexer.next() {
|
||||
if tok == token::Token::EOF { break; }
|
||||
print!("{tok:?}, ");
|
||||
tokens.push(tok);
|
||||
}
|
||||
|
|
@ -29,7 +31,7 @@ fn main() -> std::io::Result<()> {
|
|||
|
||||
let mut parser = Parser::new(tokens);
|
||||
loop {
|
||||
let parsed = parser.parse();
|
||||
let parsed = parser.parse().unwrap_or(expr::Expr::EOL);
|
||||
match parsed {
|
||||
expr::Expr::EOL => break,
|
||||
_ => {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,8 @@
|
|||
use crate::token::Token;
|
||||
use crate::expr::{Expr, UnaryOp, BinaryOp, infix_bp, prefix_bp};
|
||||
use crate::error::ParseError;
|
||||
|
||||
type ParseResult<T> = Result<T, ParseError>;
|
||||
|
||||
pub struct Parser {
|
||||
tokens: Vec<Token>,
|
||||
|
|
@ -28,39 +31,49 @@ impl Parser {
|
|||
tok
|
||||
}
|
||||
|
||||
fn parse_atom(&mut self) -> Expr {
|
||||
fn expect(&mut self, expected: &Token) -> ParseResult<Token> {
|
||||
match self.next() {
|
||||
Some(Token::Number(n)) => Expr::Number(n),
|
||||
Some(Token::Ident(id)) => Expr::Ident(id),
|
||||
Some(Token::LParen) => {
|
||||
let expr = self.parse_expr(0);
|
||||
|
||||
match self.next() {
|
||||
Some(Token::RParen) => expr,
|
||||
_ => panic!("expected ')'")
|
||||
}
|
||||
},
|
||||
tok => panic!("unknown token: {tok:?}")
|
||||
Some(tok) if &tok == expected => Ok(tok),
|
||||
other => Err(ParseError::new(format!(
|
||||
"expected {expected:?}, got {other:?}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_prefix(&mut self) -> Expr {
|
||||
fn parse_atom(&mut self) -> ParseResult<Expr> {
|
||||
match self.next() {
|
||||
Some(Token::Number(n)) => Ok(Expr::Number(n)),
|
||||
Some(Token::Ident(id)) => Ok(Expr::Ident(id)),
|
||||
Some(Token::LParen) => {
|
||||
let expr = self.parse_expr(0)?;
|
||||
|
||||
self.expect(&Token::RParen)?;
|
||||
|
||||
Ok(expr)
|
||||
},
|
||||
tok => Err(ParseError::new(format!(
|
||||
"unexpected token: {tok:?}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_prefix(&mut self) -> ParseResult<Expr> {
|
||||
match self.peek() {
|
||||
Some(Token::Minus) => {
|
||||
self.advance();
|
||||
let op = UnaryOp::Neg;
|
||||
|
||||
let bp = prefix_bp(&op);
|
||||
let rhs = self.parse_expr(bp);
|
||||
let rhs = self.parse_expr(bp)?;
|
||||
|
||||
Expr::Unary { op, right: Box::new(rhs) }
|
||||
Ok(Expr::Unary { op, right: Box::new(rhs) })
|
||||
},
|
||||
_ => self.parse_atom()
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_expr(&mut self, min_bp: u8) -> Expr {
|
||||
let mut lhs = self.parse_prefix();
|
||||
fn parse_expr(&mut self, min_bp: u8) -> ParseResult<Expr> {
|
||||
let mut lhs = self.parse_prefix()?;
|
||||
|
||||
loop {
|
||||
let op = match self.peek() {
|
||||
|
|
@ -81,16 +94,16 @@ impl Parser {
|
|||
|
||||
self.advance();
|
||||
|
||||
let rhs = self.parse_expr(right_bp);
|
||||
let rhs = self.parse_expr(right_bp)?;
|
||||
|
||||
lhs = Expr::Binary { left: Box::new(lhs), op, right: Box::new(rhs) }
|
||||
}
|
||||
|
||||
lhs
|
||||
Ok(lhs)
|
||||
}
|
||||
|
||||
pub fn parse(&mut self) -> Expr {
|
||||
if self.peek().is_none() {return Expr::EOL}
|
||||
pub fn parse(&mut self) -> ParseResult<Expr> {
|
||||
if self.peek().is_none() {return Ok(Expr::EOL)}
|
||||
self.parse_expr(0)
|
||||
}
|
||||
}
|
||||
31
src/token.rs
31
src/token.rs
|
|
@ -2,6 +2,7 @@
|
|||
pub enum Token {
|
||||
Number(i64),
|
||||
Ident(String),
|
||||
StringLit(String),
|
||||
|
||||
// Operators
|
||||
Plus,
|
||||
|
|
@ -19,5 +20,33 @@ pub enum Token {
|
|||
LBracket,
|
||||
RBracket,
|
||||
|
||||
Semicolon
|
||||
// Separators
|
||||
Semicolon,
|
||||
Comma,
|
||||
|
||||
// Keywords
|
||||
Mov,
|
||||
If,
|
||||
Else,
|
||||
Loop,
|
||||
While,
|
||||
For,
|
||||
In,
|
||||
Fun,
|
||||
Call,
|
||||
Break,
|
||||
Continue,
|
||||
True,
|
||||
False,
|
||||
DotDot,
|
||||
|
||||
// Conditionals
|
||||
Eq,
|
||||
Neq,
|
||||
Lt,
|
||||
Gt,
|
||||
Lte,
|
||||
Gte,
|
||||
|
||||
EOF
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue