added keywords, LexError
This commit is contained in:
parent
8aeb6e543e
commit
a21b119e58
4 changed files with 133 additions and 25 deletions
23
src/error.rs
23
src/error.rs
|
|
@ -17,4 +17,25 @@ impl Display for ParseError {
|
|||
}
|
||||
}
|
||||
|
||||
impl Error for ParseError {}
|
||||
impl Error for ParseError {}
|
||||
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LexError {
|
||||
message: String
|
||||
}
|
||||
|
||||
impl LexError {
|
||||
pub fn new(message: impl Into<String>) -> Self {
|
||||
Self { message: message.into() }
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for LexError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "lex error: {}", self.message)
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for LexError {}
|
||||
101
src/lexer.rs
101
src/lexer.rs
|
|
@ -1,4 +1,6 @@
|
|||
use crate::token::Token;
|
||||
use crate::{error::LexError, token::Token};
|
||||
|
||||
type LexResult<T> = Result<T, LexError>;
|
||||
|
||||
pub struct Lexer {
|
||||
source: Vec<u8>,
|
||||
|
|
@ -27,7 +29,7 @@ impl Lexer {
|
|||
self.source.get(self.pos).copied()
|
||||
}
|
||||
|
||||
fn number(&mut self) -> Option<Token> {
|
||||
fn number(&mut self) -> LexResult<Token> {
|
||||
let mut num = String::new();
|
||||
|
||||
while matches!(self.peek(), Some(b'0' ..= b'9')) {
|
||||
|
|
@ -35,42 +37,97 @@ impl Lexer {
|
|||
self.advance();
|
||||
}
|
||||
|
||||
Some(Token::Number(num.parse().unwrap()))
|
||||
Ok(Token::Number(num.parse().unwrap()))
|
||||
}
|
||||
|
||||
fn ident(&mut self) -> Option<Token> {
|
||||
fn ident(&mut self) -> LexResult<Token> {
|
||||
let mut ident = String::new();
|
||||
|
||||
while matches!(self.peek(), Some(b'a' ..= b'z' | b'A' ..= b'Z')) {
|
||||
while matches!(self.peek(), Some(b'a' ..= b'z' | b'A' ..= b'Z' | b'0' ..= b'9' | b'_')) {
|
||||
ident.push(self.peek().unwrap() as char);
|
||||
self.advance();
|
||||
}
|
||||
|
||||
Some(Token::Ident(ident))
|
||||
let tok = match ident.as_str() {
|
||||
"mov" => Token::Mov,
|
||||
"if" => Token::If,
|
||||
"else" => Token::Else,
|
||||
"loop" => Token::Loop,
|
||||
"while" => Token::While,
|
||||
"for" => Token::For,
|
||||
"in" => Token::In,
|
||||
"fun" => Token::Fun,
|
||||
"call" => Token::Call,
|
||||
"break" => Token::Break,
|
||||
"continue" => Token::Continue,
|
||||
"true" => Token::True,
|
||||
"false" => Token::False,
|
||||
|
||||
"eq" => Token::Eq,
|
||||
"new" => Token::Neq,
|
||||
"lt" => Token::Lt,
|
||||
"gt" => Token::Gt,
|
||||
"lte" => Token::Lte,
|
||||
"gte" => Token::Gte,
|
||||
|
||||
_ => Token::Ident(ident)
|
||||
};
|
||||
|
||||
Ok(tok)
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> Option<Token> {
|
||||
fn string(&mut self) -> LexResult<Token> {
|
||||
self.advance();
|
||||
let mut s = String::new();
|
||||
|
||||
loop {
|
||||
match self.peek() {
|
||||
Some(b'"') => { self.advance(); break; },
|
||||
Some(c) => { s.push(c as char); self.advance(); }
|
||||
None => { return Err(LexError::new(format!(
|
||||
"expected closing \""
|
||||
))); }
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Token::StringLit(s))
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> LexResult<Token> {
|
||||
self.skip_whitespace();
|
||||
match self.peek() {
|
||||
Some(b'0' ..= b'9') => self.number(),
|
||||
Some(b'a' ..= b'z' | b'A' ..= b'Z') => self.ident(),
|
||||
|
||||
Some(b'+') => {self.advance(); Some(Token::Plus)},
|
||||
Some(b'-') => {self.advance(); Some(Token::Minus)},
|
||||
Some(b'*') => {self.advance(); Some(Token::Asterisk)},
|
||||
Some(b'/') => {self.advance(); Some(Token::Slash)},
|
||||
Some(b'%') => {self.advance(); Some(Token::Percent)},
|
||||
Some(b'^') => {self.advance(); Some(Token::Caret)},
|
||||
Some(b'+') => {self.advance(); Ok(Token::Plus)},
|
||||
Some(b'-') => {self.advance(); Ok(Token::Minus)},
|
||||
Some(b'*') => {self.advance(); Ok(Token::Asterisk)},
|
||||
Some(b'/') => {self.advance(); Ok(Token::Slash)},
|
||||
Some(b'%') => {self.advance(); Ok(Token::Percent)},
|
||||
Some(b'^') => {self.advance(); Ok(Token::Caret)},
|
||||
|
||||
Some(b'(') => {self.advance(); Some(Token::LParen)},
|
||||
Some(b')') => {self.advance(); Some(Token::RParen)},
|
||||
Some(b'[') => {self.advance(); Some(Token::LBracket)},
|
||||
Some(b']') => {self.advance(); Some(Token::RBracket)},
|
||||
Some(b'{') => {self.advance(); Some(Token::LBrace)},
|
||||
Some(b'}') => {self.advance(); Some(Token::RBrace)},
|
||||
Some(b'(') => {self.advance(); Ok(Token::LParen)},
|
||||
Some(b')') => {self.advance(); Ok(Token::RParen)},
|
||||
Some(b'[') => {self.advance(); Ok(Token::LBracket)},
|
||||
Some(b']') => {self.advance(); Ok(Token::RBracket)},
|
||||
Some(b'{') => {self.advance(); Ok(Token::LBrace)},
|
||||
Some(b'}') => {self.advance(); Ok(Token::RBrace)},
|
||||
|
||||
Some(b';') => {self.advance(); Some(Token::Semicolon)}
|
||||
|
||||
_ => None
|
||||
Some(b';') => {self.advance(); Ok(Token::Semicolon)},
|
||||
Some(b',') => {self.advance(); Ok(Token::Comma)},
|
||||
Some(b'.') => {
|
||||
self.advance();
|
||||
|
||||
match self.peek() {
|
||||
Some(b'.') => { self.advance(); Ok(Token::DotDot) },
|
||||
c => Err(LexError::new(format!(
|
||||
"unexpected character after '.': {c:?}"
|
||||
)))
|
||||
}
|
||||
},
|
||||
Some(b'"') => self.string(),
|
||||
|
||||
_ => Ok(Token::EOF)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -21,7 +21,8 @@ fn main() -> std::io::Result<()> {
|
|||
let mut lexer = Lexer::new(&source);
|
||||
|
||||
let mut tokens = Vec::new();
|
||||
while let Some(tok) = lexer.next() {
|
||||
while let Ok(tok) = lexer.next() {
|
||||
if tok == token::Token::EOF { break; }
|
||||
print!("{tok:?}, ");
|
||||
tokens.push(tok);
|
||||
}
|
||||
|
|
|
|||
31
src/token.rs
31
src/token.rs
|
|
@ -2,6 +2,7 @@
|
|||
pub enum Token {
|
||||
Number(i64),
|
||||
Ident(String),
|
||||
StringLit(String),
|
||||
|
||||
// Operators
|
||||
Plus,
|
||||
|
|
@ -19,5 +20,33 @@ pub enum Token {
|
|||
LBracket,
|
||||
RBracket,
|
||||
|
||||
Semicolon
|
||||
// Separators
|
||||
Semicolon,
|
||||
Comma,
|
||||
|
||||
// Keywords
|
||||
Mov,
|
||||
If,
|
||||
Else,
|
||||
Loop,
|
||||
While,
|
||||
For,
|
||||
In,
|
||||
Fun,
|
||||
Call,
|
||||
Break,
|
||||
Continue,
|
||||
True,
|
||||
False,
|
||||
DotDot,
|
||||
|
||||
// Conditionals
|
||||
Eq,
|
||||
Neq,
|
||||
Lt,
|
||||
Gt,
|
||||
Lte,
|
||||
Gte,
|
||||
|
||||
EOF
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue