From ad7a8ad8793019ad93cee971187cc7d0f0cca462 Mon Sep 17 00:00:00 2001 From: Raptorox <70806316+Raptorox@users.noreply.github.com> Date: Wed, 20 May 2026 21:04:14 +0200 Subject: [PATCH] add lexer --- .gitignore | 1 + src/lexer.rs | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 25 ++++++++++++-- 3 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 src/lexer.rs diff --git a/.gitignore b/.gitignore index ea8c4bf..767ad53 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +/res \ No newline at end of file diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..d49eafd --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,96 @@ +#[derive(Debug, PartialEq)] +pub enum Token { + Number(i64), + Ident(String), + + // Operators + Add, + Sub, + Mul, + Div, + Mod, + + // Parentheses + LParen, + RParen, + LBrace, + RBrace, + LBracket, + RBracket, + + Semicolon +} + +pub struct Lexer { + source: Vec, + pos: usize +} + +impl Lexer { + pub fn new(source: &str) -> Self { + Self { + source: source.as_bytes().to_vec(), + pos: 0 + } + } + + fn skip_whitespace(&mut self) { + while matches!(self.peek(), Some(c) if c.is_ascii_whitespace()) { + self.advance(); + } + } + + fn advance(&mut self) { + self.pos += 1; + } + + fn peek(&self) -> Option { + self.source.get(self.pos).copied() + } + + fn number(&mut self) -> Option { + let mut num = String::new(); + + while matches!(self.peek(), Some(b'0' ..= b'9')) { + num.push(self.peek().unwrap() as char); + self.advance(); + } + + Some(Token::Number(num.parse().unwrap())) + } + + fn ident(&mut self) -> Option { + let mut ident = String::new(); + + while matches!(self.peek(), Some(b'a' ..= b'z' | b'A' ..= b'Z')) { + ident.push(self.peek().unwrap() as char); + } + + Some(Token::Ident(ident)) + } + + pub fn next(&mut self) -> Option { + self.skip_whitespace(); + match self.peek() { + Some(b'0' ..= b'9') => self.number(), + Some(b'a' ..= b'z' | b'A' ..= b'Z') => self.ident(), + + Some(b'+') => {self.advance(); Some(Token::Add)}, + Some(b'-') => {self.advance(); Some(Token::Sub)}, + Some(b'*') => {self.advance(); Some(Token::Mul)}, + Some(b'/') => {self.advance(); Some(Token::Div)}, + Some(b'%') => {self.advance(); Some(Token::Mod)}, + + Some(b'(') => {self.advance(); Some(Token::LParen)}, + Some(b')') => {self.advance(); Some(Token::RParen)}, + Some(b'[') => {self.advance(); Some(Token::LBracket)}, + Some(b']') => {self.advance(); Some(Token::RBracket)}, + Some(b'{') => {self.advance(); Some(Token::LBrace)}, + Some(b'}') => {self.advance(); Some(Token::RBrace)}, + + Some(b';') => {self.advance(); Some(Token::Semicolon)} + + _ => None + } + } +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index e7a11a9..59ed221 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,24 @@ -fn main() { - println!("Hello, world!"); +use std::{fs::File, io::Read}; + +mod lexer; +use lexer::Lexer; + +fn main() -> std::io::Result<()> { + let args = std::env::args().collect::>(); + + let mut source = String::new(); + File::open(&args[1])?.read_to_string(&mut source)?; + + println!("{:?}", source); + + let mut lexer = Lexer::new(&source); + + loop { + match lexer.next() { + Some(tok) => print!("{tok:?}, "), + None => break + } + } + + Ok(()) }