moved AST into seperate file

This commit is contained in:
ParkerTenBroeck 2025-12-18 22:46:16 -05:00
parent 7970cb197a
commit 7e0d4ca69a
6 changed files with 116 additions and 83 deletions

View file

@ -5,7 +5,6 @@
# Replace llvmPackages with llvmPackages_X, where X is the latest LLVM version (at the time of writing, 16) # Replace llvmPackages with llvmPackages_X, where X is the latest LLVM version (at the time of writing, 16)
llvmPackages.bintools llvmPackages.bintools
rustup rustup
nasm
]; ];
RUSTC_VERSION = "nightly"; RUSTC_VERSION = "nightly";
# https://github.com/rust-lang/rust-bindgen#environment-variables # https://github.com/rust-lang/rust-bindgen#environment-variables

50
src/ast.rs Normal file
View file

@ -0,0 +1,50 @@
use std::ops::Range;
use crate::lexer::Spanned;
#[derive(Clone, Debug)]
pub struct Tuple<'a>(pub Vec<Spanned<Symbol<'a>>>);
#[derive(Clone, Debug)]
pub enum Symbol<'a> {
Epsilon,
Ident(&'a str),
}
#[derive(Clone, Debug)]
pub enum Dest<'a> {
Ident(&'a str),
Function(Spanned<&'a str>, Spanned<Tuple<'a>>),
}
#[derive(Clone, Debug)]
pub enum Item<'a> {
Symbol(Symbol<'a>),
Tuple(Tuple<'a>),
List(List<'a>),
}
#[derive(Clone, Debug)]
pub enum Regex<'a>{
Terminal(&'a str),
Match{
complement: bool,
patterns: Vec<Range<char>>
},
Concat(Vec<Regex<'a>>),
Star(Box<Regex<'a>>),
Plus(Box<Regex<'a>>),
Union(Vec<Regex<'a>>),
Intersection(Vec<Regex<'a>>),
Complement(Box<Regex<'a>>),
}
#[derive(Clone, Debug)]
pub struct List<'a>(pub Vec<Spanned<Item<'a>>>);
#[derive(Clone, Debug)]
pub enum TopLevel<'a> {
Assignment(Spanned<Dest<'a>>, Spanned<Item<'a>>),
ProductionRule(Spanned<Symbol<'a>>, Spanned<Symbol<'a>>),
Table(),
}

View file

@ -29,21 +29,21 @@ pub enum Token<'a> {
impl<'a> std::fmt::Display for Token<'a> { impl<'a> std::fmt::Display for Token<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
Token::LPar => write!(f, ")"), Token::LPar => write!(f, "')'"),
Token::RPar => write!(f, "("), Token::RPar => write!(f, "'('"),
Token::LBrace => write!(f, "{{"), Token::LBrace => write!(f, "'{{'"),
Token::RBrace => write!(f, "}}"), Token::RBrace => write!(f, "'}}'"),
Token::LBracket => write!(f, "["), Token::LBracket => write!(f, "'['"),
Token::RBracket => write!(f, "]"), Token::RBracket => write!(f, "']'"),
Token::Tilde => write!(f, "~"), Token::Tilde => write!(f, "'~'"),
Token::Eq => write!(f, "="), Token::Eq => write!(f, "'='"),
Token::Comma => write!(f, ","), Token::Comma => write!(f, "','"),
Token::Or => write!(f, "|"), Token::Or => write!(f, "'|'"),
Token::Plus => write!(f, "+"), Token::Plus => write!(f, "'+'"),
Token::Star => write!(f, "*"), Token::Star => write!(f, "'*'"),
Token::And => write!(f, "&"), Token::And => write!(f, "'&'"),
Token::LSmallArrow => write!(f, "->"), Token::LSmallArrow => write!(f, "'->'"),
Token::LBigArrow => write!(f, "=>"), Token::LBigArrow => write!(f, "'=>'"),
Token::Comment(_) => write!(f, "<comment>"), Token::Comment(_) => write!(f, "<comment>"),
Token::Ident(ident) if f.alternate() => write!(f, "{ident:?}"), Token::Ident(ident) if f.alternate() => write!(f, "{ident:?}"),
Token::Ident(_) => write!(f, "ident"), Token::Ident(_) => write!(f, "ident"),
@ -126,7 +126,7 @@ impl<'a> std::iter::Iterator for Lexer<'a> {
let res = match self.consume()? { let res = match self.consume()? {
'(' => Ok(Token::LPar), '(' => Ok(Token::LPar),
')' => Ok(Token::LPar), ')' => Ok(Token::RPar),
'{' => Ok(Token::LBrace), '{' => Ok(Token::LBrace),
'}' => Ok(Token::RBrace), '}' => Ok(Token::RBrace),
'[' => Ok(Token::LBracket), '[' => Ok(Token::LBracket),

View file

@ -1,6 +1,7 @@
pub mod dfa; pub mod dfa;
pub mod lexer; pub mod lexer;
pub mod parser; pub mod parser;
pub mod ast;
pub struct SymbolMap<T>([T; 256]); pub struct SymbolMap<T>([T; 256]);

View file

@ -2,6 +2,6 @@ use automata::{lexer::Lexer, parser::Parser};
fn main() { fn main() {
let input = include_str!("../example.txt"); let input = include_str!("../example.txt");
println!("{:#?}", Parser::new(Lexer::new(input)).parse_elements()); println!("{:#?}", Parser::new(Lexer::new(input)).parse_elements());
} }

View file

@ -1,37 +1,8 @@
use std::iter::Peekable; use std::{iter::Peekable};
use crate::ast::*;
use crate::lexer::{Lexer, Span, Spanned, Token}; use crate::lexer::{Lexer, Span, Spanned, Token};
#[derive(Clone, Debug)]
pub struct Tuple<'a>(pub Vec<Spanned<Symbol<'a>>>);
#[derive(Clone, Debug)]
pub enum Symbol<'a> {
Epsilon,
Ident(&'a str),
}
#[derive(Clone, Debug)]
pub enum Dest<'a> {
Ident(&'a str),
Function(Spanned<&'a str>, Spanned<Tuple<'a>>),
}
#[derive(Clone, Debug)]
pub enum Item<'a> {
Symbol(Symbol<'a>),
Tuple(Tuple<'a>),
List(List<'a>)
}
#[derive(Clone, Debug)]
pub struct List<'a>(pub Vec<Spanned<Item<'a>>>);
#[derive(Clone, Debug)]
pub enum TopLevel<'a> {
Assignment(Spanned<Dest<'a>>, Spanned<Item<'a>>),
Table(),
}
pub enum LogKind { pub enum LogKind {
Lexer, Lexer,
@ -58,8 +29,12 @@ pub struct Parser<'a> {
} }
impl<'a> Parser<'a> { impl<'a> Parser<'a> {
pub fn new(lexer: Lexer<'a>) -> Self{ pub fn new(lexer: Lexer<'a>) -> Self {
Parser { eof: lexer.eof_span(), lexer: lexer.peekable(), log: Vec::new() } Parser {
eof: lexer.eof_span(),
lexer: lexer.peekable(),
log: Vec::new(),
}
} }
fn next_token(&mut self) -> Option<Spanned<Token<'a>>> { fn next_token(&mut self) -> Option<Spanned<Token<'a>>> {
@ -80,7 +55,6 @@ impl<'a> Parser<'a> {
fn peek_token(&mut self) -> Option<Spanned<Token<'a>>> { fn peek_token(&mut self) -> Option<Spanned<Token<'a>>> {
loop { loop {
match *self.lexer.peek()? { match *self.lexer.peek()? {
// not a heavy clone but because of range
Spanned(Ok(ok), r) => return Some(Spanned(ok, r)), Spanned(Ok(ok), r) => return Some(Spanned(ok, r)),
Spanned(Err(err), r) => self.log.push(Log { Spanned(Err(err), r) => self.log.push(Log {
message: format!("{err:?}"), message: format!("{err:?}"),
@ -92,7 +66,7 @@ impl<'a> Parser<'a> {
} }
} }
fn expect_token(&mut self, expected: Token<'a>) -> (bool, Span) { fn expect_token(&mut self, expected: Token<'a>) -> (bool, Span) {
if let Some(Spanned(token, range)) = self.next_token() { if let Some(Spanned(token, range)) = self.next_token() {
if token != expected { if token != expected {
self.log.push(Log { self.log.push(Log {
@ -102,7 +76,7 @@ impl<'a> Parser<'a> {
kind: LogKind::Lexer, kind: LogKind::Lexer,
}); });
(false, range) (false, range)
}else{ } else {
(true, range) (true, range)
} }
} else { } else {
@ -155,8 +129,8 @@ impl<'a> Parser<'a> {
pub fn parse_tupple(&mut self) -> Spanned<Tuple<'a>> { pub fn parse_tupple(&mut self) -> Spanned<Tuple<'a>> {
let mut items = Vec::new(); let mut items = Vec::new();
let (matched, start) = self.expect_token(Token::LPar); let (matched, start) = self.expect_token(Token::LPar);
if !matched{ if !matched {
return Spanned(Tuple(Vec::new()), start) return Spanned(Tuple(Vec::new()), start);
} }
while !matches!(self.peek_token(), Some(Spanned(Token::RPar, _))) { while !matches!(self.peek_token(), Some(Spanned(Token::RPar, _))) {
@ -164,12 +138,9 @@ impl<'a> Parser<'a> {
if matches!(self.peek_token(), Some(Spanned(Token::Comma, _))) { if matches!(self.peek_token(), Some(Spanned(Token::Comma, _))) {
self.next_token(); self.next_token();
} }
if self.peek_token().is_none(){ if self.peek_token().is_none() {
self.log.push(Log { self.log.push(Log {
message: format!( message: format!("unexpected eof expected {:}", Token::RPar),
"unexpected eof expected {:}",
Token::RPar
),
range: self.eof, range: self.eof,
level: LogLevel::Error, level: LogLevel::Error,
kind: LogKind::Lexer, kind: LogKind::Lexer,
@ -183,9 +154,11 @@ impl<'a> Parser<'a> {
Spanned(Tuple(items), start.join(end)) Spanned(Tuple(items), start.join(end))
} }
pub fn parse_item(&mut self) -> Spanned<Item<'a>>{ pub fn parse_item(&mut self) -> Spanned<Item<'a>> {
match self.peek_token(){ match self.peek_token() {
Some(Spanned(Token::Ident(_)|Token::Tilde, _)) => self.parse_symbol().map(Item::Symbol), Some(Spanned(Token::Ident(_) | Token::Tilde, _)) => {
self.parse_symbol().map(Item::Symbol)
}
Some(Spanned(Token::LPar, _)) => self.parse_tupple().map(Item::Tuple), Some(Spanned(Token::LPar, _)) => self.parse_tupple().map(Item::Tuple),
Some(Spanned(Token::LBrace, _)) => self.parse_list().map(Item::List), Some(Spanned(Token::LBrace, _)) => self.parse_list().map(Item::List),
Some(Spanned(got, r)) => { Some(Spanned(got, r)) => {
@ -222,11 +195,11 @@ impl<'a> Parser<'a> {
} }
} }
pub fn parse_list(&mut self) -> Spanned<List<'a>>{ pub fn parse_list(&mut self) -> Spanned<List<'a>> {
let mut list = Vec::new(); let mut list = Vec::new();
let (matched, start) = self.expect_token(Token::LBrace); let (matched, start) = self.expect_token(Token::LBrace);
if !matched{ if !matched {
return Spanned(List(Vec::new()), start) return Spanned(List(Vec::new()), start);
} }
while !matches!(self.peek_token(), Some(Spanned(Token::RBrace, _))) { while !matches!(self.peek_token(), Some(Spanned(Token::RBrace, _))) {
@ -234,12 +207,9 @@ impl<'a> Parser<'a> {
if matches!(self.peek_token(), Some(Spanned(Token::Comma, _))) { if matches!(self.peek_token(), Some(Spanned(Token::Comma, _))) {
self.next_token(); self.next_token();
} }
if self.peek_token().is_none(){ if self.peek_token().is_none() {
self.log.push(Log { self.log.push(Log {
message: format!( message: format!("unexpected eof expected {:}", Token::RBrace),
"unexpected eof expected {:}",
Token::RBrace
),
range: self.eof, range: self.eof,
level: LogLevel::Error, level: LogLevel::Error,
kind: LogKind::Lexer, kind: LogKind::Lexer,
@ -251,28 +221,41 @@ impl<'a> Parser<'a> {
Spanned(List(list), start.join(end)) Spanned(List(list), start.join(end))
} }
pub fn parse_regex(&mut self) -> Spanned<Regex<'a>>{
todo!()
}
pub fn parse_elements(&mut self) -> Vec<Spanned<TopLevel<'a>>> { pub fn parse_elements(&mut self) -> Vec<Spanned<TopLevel<'a>>> {
let mut result = Vec::new(); let mut result = Vec::new();
loop { loop {
let Some(next) = self.next_token() else { break }; let Some(next) = self.next_token() else { break };
match next { match (next, self.peek_token()) {
Spanned(Token::Ident(ident), ident_range) => { (Spanned(Token::Ident(ident), start), Some(Spanned(Token::LPar, _))) => {
let dest @ Spanned(_, start) = if matches!(self.peek_token(), Some(Spanned(Token::LPar, _))) { let tuple = self.parse_tupple();
let tuple = self.parse_tupple(); let span = start.join(tuple.1);
let span = ident_range.join(tuple.1); let dest = Spanned(Dest::Function(Spanned(ident, start), tuple), span);
Spanned(Dest::Function(Spanned(ident, ident_range), tuple), span) self.expect_token(Token::Eq);
} else { let item = self.parse_item();
Spanned(Dest::Ident(ident), ident_range) let span = start.join(item.1);
}; result.push(Spanned(TopLevel::Assignment(dest, item), span));
}
(Spanned(Token::Ident(_), _), Some(Spanned(Token::LSmallArrow|Token::Ident(_), _))) => {
todo!()
}
(Spanned(Token::Ident(ident), start), _) => {
let dest = Spanned(Dest::Ident(ident), start);
self.expect_token(Token::Eq); self.expect_token(Token::Eq);
let item = self.parse_item(); let item = self.parse_item();
let span = start.join(item.1); let span = start.join(item.1);
result.push(Spanned(TopLevel::Assignment(dest, item), span)); result.push(Spanned(TopLevel::Assignment(dest, item), span));
} }
_ => self.log.push(Log { _ => self.log.push(Log {
message: format!("unexpected token {:#}, expected {:}", next.0, Token::Ident("")), message: format!(
"unexpected token {:#}, expected {:}",
next.0,
Token::Ident("")
),
range: next.1, range: next.1,
level: LogLevel::Error, level: LogLevel::Error,
kind: LogKind::Lexer, kind: LogKind::Lexer,