From 7e0d4ca69a432ab4f3ec1a7bb88cca12d034e1ca Mon Sep 17 00:00:00 2001 From: ParkerTenBroeck <51721964+ParkerTenBroeck@users.noreply.github.com> Date: Thu, 18 Dec 2025 22:46:16 -0500 Subject: [PATCH] moved AST into seperate file --- default.nix | 1 - src/ast.rs | 50 ++++++++++++++++++++++ src/lexer.rs | 32 +++++++------- src/lib.rs | 1 + src/main.rs | 2 +- src/parser.rs | 113 +++++++++++++++++++++----------------------------- 6 files changed, 116 insertions(+), 83 deletions(-) create mode 100644 src/ast.rs diff --git a/default.nix b/default.nix index 9de37f8..579ba87 100644 --- a/default.nix +++ b/default.nix @@ -5,7 +5,6 @@ # Replace llvmPackages with llvmPackages_X, where X is the latest LLVM version (at the time of writing, 16) llvmPackages.bintools rustup - nasm ]; RUSTC_VERSION = "nightly"; # https://github.com/rust-lang/rust-bindgen#environment-variables diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..e5c1463 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,50 @@ +use std::ops::Range; + +use crate::lexer::Spanned; + +#[derive(Clone, Debug)] +pub struct Tuple<'a>(pub Vec>>); + +#[derive(Clone, Debug)] +pub enum Symbol<'a> { + Epsilon, + Ident(&'a str), +} + +#[derive(Clone, Debug)] +pub enum Dest<'a> { + Ident(&'a str), + Function(Spanned<&'a str>, Spanned>), +} + +#[derive(Clone, Debug)] +pub enum Item<'a> { + Symbol(Symbol<'a>), + Tuple(Tuple<'a>), + List(List<'a>), +} + +#[derive(Clone, Debug)] +pub enum Regex<'a>{ + Terminal(&'a str), + Match{ + complement: bool, + patterns: Vec> + }, + Concat(Vec>), + Star(Box>), + Plus(Box>), + Union(Vec>), + Intersection(Vec>), + Complement(Box>), +} + +#[derive(Clone, Debug)] +pub struct List<'a>(pub Vec>>); + +#[derive(Clone, Debug)] +pub enum TopLevel<'a> { + Assignment(Spanned>, Spanned>), + ProductionRule(Spanned>, Spanned>), + Table(), +} \ No newline at end of file diff --git a/src/lexer.rs b/src/lexer.rs index 531c918..d0200d7 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -29,21 +29,21 @@ pub enum Token<'a> { impl<'a> std::fmt::Display for Token<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Token::LPar => write!(f, ")"), - Token::RPar => write!(f, "("), - Token::LBrace => write!(f, "{{"), - Token::RBrace => write!(f, "}}"), - Token::LBracket => write!(f, "["), - Token::RBracket => write!(f, "]"), - Token::Tilde => write!(f, "~"), - Token::Eq => write!(f, "="), - Token::Comma => write!(f, ","), - Token::Or => write!(f, "|"), - Token::Plus => write!(f, "+"), - Token::Star => write!(f, "*"), - Token::And => write!(f, "&"), - Token::LSmallArrow => write!(f, "->"), - Token::LBigArrow => write!(f, "=>"), + Token::LPar => write!(f, "')'"), + Token::RPar => write!(f, "'('"), + Token::LBrace => write!(f, "'{{'"), + Token::RBrace => write!(f, "'}}'"), + Token::LBracket => write!(f, "'['"), + Token::RBracket => write!(f, "']'"), + Token::Tilde => write!(f, "'~'"), + Token::Eq => write!(f, "'='"), + Token::Comma => write!(f, "','"), + Token::Or => write!(f, "'|'"), + Token::Plus => write!(f, "'+'"), + Token::Star => write!(f, "'*'"), + Token::And => write!(f, "'&'"), + Token::LSmallArrow => write!(f, "'->'"), + Token::LBigArrow => write!(f, "'=>'"), Token::Comment(_) => write!(f, ""), Token::Ident(ident) if f.alternate() => write!(f, "{ident:?}"), Token::Ident(_) => write!(f, "ident"), @@ -126,7 +126,7 @@ impl<'a> std::iter::Iterator for Lexer<'a> { let res = match self.consume()? { '(' => Ok(Token::LPar), - ')' => Ok(Token::LPar), + ')' => Ok(Token::RPar), '{' => Ok(Token::LBrace), '}' => Ok(Token::RBrace), '[' => Ok(Token::LBracket), diff --git a/src/lib.rs b/src/lib.rs index 8c16373..5a6efaf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ pub mod dfa; pub mod lexer; pub mod parser; +pub mod ast; pub struct SymbolMap([T; 256]); diff --git a/src/main.rs b/src/main.rs index db420e4..da8c6f9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,6 @@ use automata::{lexer::Lexer, parser::Parser}; fn main() { let input = include_str!("../example.txt"); - + println!("{:#?}", Parser::new(Lexer::new(input)).parse_elements()); } diff --git a/src/parser.rs b/src/parser.rs index 54c8aef..c879739 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,37 +1,8 @@ -use std::iter::Peekable; - +use std::{iter::Peekable}; +use crate::ast::*; use crate::lexer::{Lexer, Span, Spanned, Token}; -#[derive(Clone, Debug)] -pub struct Tuple<'a>(pub Vec>>); -#[derive(Clone, Debug)] -pub enum Symbol<'a> { - Epsilon, - Ident(&'a str), -} - -#[derive(Clone, Debug)] -pub enum Dest<'a> { - Ident(&'a str), - Function(Spanned<&'a str>, Spanned>), -} - -#[derive(Clone, Debug)] -pub enum Item<'a> { - Symbol(Symbol<'a>), - Tuple(Tuple<'a>), - List(List<'a>) -} - -#[derive(Clone, Debug)] -pub struct List<'a>(pub Vec>>); - -#[derive(Clone, Debug)] -pub enum TopLevel<'a> { - Assignment(Spanned>, Spanned>), - Table(), -} pub enum LogKind { Lexer, @@ -58,8 +29,12 @@ pub struct Parser<'a> { } impl<'a> Parser<'a> { - pub fn new(lexer: Lexer<'a>) -> Self{ - Parser { eof: lexer.eof_span(), lexer: lexer.peekable(), log: Vec::new() } + pub fn new(lexer: Lexer<'a>) -> Self { + Parser { + eof: lexer.eof_span(), + lexer: lexer.peekable(), + log: Vec::new(), + } } fn next_token(&mut self) -> Option>> { @@ -80,7 +55,6 @@ impl<'a> Parser<'a> { fn peek_token(&mut self) -> Option>> { loop { match *self.lexer.peek()? { - // not a heavy clone but because of range Spanned(Ok(ok), r) => return Some(Spanned(ok, r)), Spanned(Err(err), r) => self.log.push(Log { message: format!("{err:?}"), @@ -92,7 +66,7 @@ impl<'a> Parser<'a> { } } - fn expect_token(&mut self, expected: Token<'a>) -> (bool, Span) { + fn expect_token(&mut self, expected: Token<'a>) -> (bool, Span) { if let Some(Spanned(token, range)) = self.next_token() { if token != expected { self.log.push(Log { @@ -102,7 +76,7 @@ impl<'a> Parser<'a> { kind: LogKind::Lexer, }); (false, range) - }else{ + } else { (true, range) } } else { @@ -155,8 +129,8 @@ impl<'a> Parser<'a> { pub fn parse_tupple(&mut self) -> Spanned> { let mut items = Vec::new(); let (matched, start) = self.expect_token(Token::LPar); - if !matched{ - return Spanned(Tuple(Vec::new()), start) + if !matched { + return Spanned(Tuple(Vec::new()), start); } while !matches!(self.peek_token(), Some(Spanned(Token::RPar, _))) { @@ -164,12 +138,9 @@ impl<'a> Parser<'a> { if matches!(self.peek_token(), Some(Spanned(Token::Comma, _))) { self.next_token(); } - if self.peek_token().is_none(){ + if self.peek_token().is_none() { self.log.push(Log { - message: format!( - "unexpected eof expected {:}", - Token::RPar - ), + message: format!("unexpected eof expected {:}", Token::RPar), range: self.eof, level: LogLevel::Error, kind: LogKind::Lexer, @@ -183,9 +154,11 @@ impl<'a> Parser<'a> { Spanned(Tuple(items), start.join(end)) } - pub fn parse_item(&mut self) -> Spanned>{ - match self.peek_token(){ - Some(Spanned(Token::Ident(_)|Token::Tilde, _)) => self.parse_symbol().map(Item::Symbol), + pub fn parse_item(&mut self) -> Spanned> { + match self.peek_token() { + Some(Spanned(Token::Ident(_) | Token::Tilde, _)) => { + self.parse_symbol().map(Item::Symbol) + } Some(Spanned(Token::LPar, _)) => self.parse_tupple().map(Item::Tuple), Some(Spanned(Token::LBrace, _)) => self.parse_list().map(Item::List), Some(Spanned(got, r)) => { @@ -222,11 +195,11 @@ impl<'a> Parser<'a> { } } - pub fn parse_list(&mut self) -> Spanned>{ + pub fn parse_list(&mut self) -> Spanned> { let mut list = Vec::new(); let (matched, start) = self.expect_token(Token::LBrace); - if !matched{ - return Spanned(List(Vec::new()), start) + if !matched { + return Spanned(List(Vec::new()), start); } while !matches!(self.peek_token(), Some(Spanned(Token::RBrace, _))) { @@ -234,12 +207,9 @@ impl<'a> Parser<'a> { if matches!(self.peek_token(), Some(Spanned(Token::Comma, _))) { self.next_token(); } - if self.peek_token().is_none(){ + if self.peek_token().is_none() { self.log.push(Log { - message: format!( - "unexpected eof expected {:}", - Token::RBrace - ), + message: format!("unexpected eof expected {:}", Token::RBrace), range: self.eof, level: LogLevel::Error, kind: LogKind::Lexer, @@ -251,28 +221,41 @@ impl<'a> Parser<'a> { Spanned(List(list), start.join(end)) } + pub fn parse_regex(&mut self) -> Spanned>{ + todo!() + } + pub fn parse_elements(&mut self) -> Vec>> { let mut result = Vec::new(); loop { let Some(next) = self.next_token() else { break }; - match next { - Spanned(Token::Ident(ident), ident_range) => { - let dest @ Spanned(_, start) = if matches!(self.peek_token(), Some(Spanned(Token::LPar, _))) { - let tuple = self.parse_tupple(); - let span = ident_range.join(tuple.1); - Spanned(Dest::Function(Spanned(ident, ident_range), tuple), span) - } else { - Spanned(Dest::Ident(ident), ident_range) - }; + match (next, self.peek_token()) { + (Spanned(Token::Ident(ident), start), Some(Spanned(Token::LPar, _))) => { + let tuple = self.parse_tupple(); + let span = start.join(tuple.1); + let dest = Spanned(Dest::Function(Spanned(ident, start), tuple), span); + self.expect_token(Token::Eq); + let item = self.parse_item(); + let span = start.join(item.1); + result.push(Spanned(TopLevel::Assignment(dest, item), span)); + } + (Spanned(Token::Ident(_), _), Some(Spanned(Token::LSmallArrow|Token::Ident(_), _))) => { + todo!() + } + (Spanned(Token::Ident(ident), start), _) => { + let dest = Spanned(Dest::Ident(ident), start); self.expect_token(Token::Eq); - let item = self.parse_item(); let span = start.join(item.1); result.push(Spanned(TopLevel::Assignment(dest, item), span)); } _ => self.log.push(Log { - message: format!("unexpected token {:#}, expected {:}", next.0, Token::Ident("")), + message: format!( + "unexpected token {:#}, expected {:}", + next.0, + Token::Ident("") + ), range: next.1, level: LogLevel::Error, kind: LogKind::Lexer,