moved AST into seperate file

This commit is contained in:
ParkerTenBroeck 2025-12-18 22:46:16 -05:00
parent 7970cb197a
commit 7e0d4ca69a
6 changed files with 116 additions and 83 deletions

View file

@ -5,7 +5,6 @@
# Replace llvmPackages with llvmPackages_X, where X is the latest LLVM version (at the time of writing, 16)
llvmPackages.bintools
rustup
nasm
];
RUSTC_VERSION = "nightly";
# https://github.com/rust-lang/rust-bindgen#environment-variables

50
src/ast.rs Normal file
View file

@ -0,0 +1,50 @@
use std::ops::Range;
use crate::lexer::Spanned;
#[derive(Clone, Debug)]
pub struct Tuple<'a>(pub Vec<Spanned<Symbol<'a>>>);
#[derive(Clone, Debug)]
pub enum Symbol<'a> {
Epsilon,
Ident(&'a str),
}
#[derive(Clone, Debug)]
pub enum Dest<'a> {
Ident(&'a str),
Function(Spanned<&'a str>, Spanned<Tuple<'a>>),
}
#[derive(Clone, Debug)]
pub enum Item<'a> {
Symbol(Symbol<'a>),
Tuple(Tuple<'a>),
List(List<'a>),
}
#[derive(Clone, Debug)]
pub enum Regex<'a>{
Terminal(&'a str),
Match{
complement: bool,
patterns: Vec<Range<char>>
},
Concat(Vec<Regex<'a>>),
Star(Box<Regex<'a>>),
Plus(Box<Regex<'a>>),
Union(Vec<Regex<'a>>),
Intersection(Vec<Regex<'a>>),
Complement(Box<Regex<'a>>),
}
#[derive(Clone, Debug)]
pub struct List<'a>(pub Vec<Spanned<Item<'a>>>);
#[derive(Clone, Debug)]
pub enum TopLevel<'a> {
Assignment(Spanned<Dest<'a>>, Spanned<Item<'a>>),
ProductionRule(Spanned<Symbol<'a>>, Spanned<Symbol<'a>>),
Table(),
}

View file

@ -29,21 +29,21 @@ pub enum Token<'a> {
impl<'a> std::fmt::Display for Token<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Token::LPar => write!(f, ")"),
Token::RPar => write!(f, "("),
Token::LBrace => write!(f, "{{"),
Token::RBrace => write!(f, "}}"),
Token::LBracket => write!(f, "["),
Token::RBracket => write!(f, "]"),
Token::Tilde => write!(f, "~"),
Token::Eq => write!(f, "="),
Token::Comma => write!(f, ","),
Token::Or => write!(f, "|"),
Token::Plus => write!(f, "+"),
Token::Star => write!(f, "*"),
Token::And => write!(f, "&"),
Token::LSmallArrow => write!(f, "->"),
Token::LBigArrow => write!(f, "=>"),
Token::LPar => write!(f, "')'"),
Token::RPar => write!(f, "'('"),
Token::LBrace => write!(f, "'{{'"),
Token::RBrace => write!(f, "'}}'"),
Token::LBracket => write!(f, "'['"),
Token::RBracket => write!(f, "']'"),
Token::Tilde => write!(f, "'~'"),
Token::Eq => write!(f, "'='"),
Token::Comma => write!(f, "','"),
Token::Or => write!(f, "'|'"),
Token::Plus => write!(f, "'+'"),
Token::Star => write!(f, "'*'"),
Token::And => write!(f, "'&'"),
Token::LSmallArrow => write!(f, "'->'"),
Token::LBigArrow => write!(f, "'=>'"),
Token::Comment(_) => write!(f, "<comment>"),
Token::Ident(ident) if f.alternate() => write!(f, "{ident:?}"),
Token::Ident(_) => write!(f, "ident"),
@ -126,7 +126,7 @@ impl<'a> std::iter::Iterator for Lexer<'a> {
let res = match self.consume()? {
'(' => Ok(Token::LPar),
')' => Ok(Token::LPar),
')' => Ok(Token::RPar),
'{' => Ok(Token::LBrace),
'}' => Ok(Token::RBrace),
'[' => Ok(Token::LBracket),

View file

@ -1,6 +1,7 @@
pub mod dfa;
pub mod lexer;
pub mod parser;
pub mod ast;
pub struct SymbolMap<T>([T; 256]);

View file

@ -1,37 +1,8 @@
use std::iter::Peekable;
use std::{iter::Peekable};
use crate::ast::*;
use crate::lexer::{Lexer, Span, Spanned, Token};
#[derive(Clone, Debug)]
pub struct Tuple<'a>(pub Vec<Spanned<Symbol<'a>>>);
#[derive(Clone, Debug)]
pub enum Symbol<'a> {
Epsilon,
Ident(&'a str),
}
#[derive(Clone, Debug)]
pub enum Dest<'a> {
Ident(&'a str),
Function(Spanned<&'a str>, Spanned<Tuple<'a>>),
}
#[derive(Clone, Debug)]
pub enum Item<'a> {
Symbol(Symbol<'a>),
Tuple(Tuple<'a>),
List(List<'a>)
}
#[derive(Clone, Debug)]
pub struct List<'a>(pub Vec<Spanned<Item<'a>>>);
#[derive(Clone, Debug)]
pub enum TopLevel<'a> {
Assignment(Spanned<Dest<'a>>, Spanned<Item<'a>>),
Table(),
}
pub enum LogKind {
Lexer,
@ -58,8 +29,12 @@ pub struct Parser<'a> {
}
impl<'a> Parser<'a> {
pub fn new(lexer: Lexer<'a>) -> Self{
Parser { eof: lexer.eof_span(), lexer: lexer.peekable(), log: Vec::new() }
pub fn new(lexer: Lexer<'a>) -> Self {
Parser {
eof: lexer.eof_span(),
lexer: lexer.peekable(),
log: Vec::new(),
}
}
fn next_token(&mut self) -> Option<Spanned<Token<'a>>> {
@ -80,7 +55,6 @@ impl<'a> Parser<'a> {
fn peek_token(&mut self) -> Option<Spanned<Token<'a>>> {
loop {
match *self.lexer.peek()? {
// not a heavy clone but because of range
Spanned(Ok(ok), r) => return Some(Spanned(ok, r)),
Spanned(Err(err), r) => self.log.push(Log {
message: format!("{err:?}"),
@ -102,7 +76,7 @@ impl<'a> Parser<'a> {
kind: LogKind::Lexer,
});
(false, range)
}else{
} else {
(true, range)
}
} else {
@ -155,8 +129,8 @@ impl<'a> Parser<'a> {
pub fn parse_tupple(&mut self) -> Spanned<Tuple<'a>> {
let mut items = Vec::new();
let (matched, start) = self.expect_token(Token::LPar);
if !matched{
return Spanned(Tuple(Vec::new()), start)
if !matched {
return Spanned(Tuple(Vec::new()), start);
}
while !matches!(self.peek_token(), Some(Spanned(Token::RPar, _))) {
@ -164,12 +138,9 @@ impl<'a> Parser<'a> {
if matches!(self.peek_token(), Some(Spanned(Token::Comma, _))) {
self.next_token();
}
if self.peek_token().is_none(){
if self.peek_token().is_none() {
self.log.push(Log {
message: format!(
"unexpected eof expected {:}",
Token::RPar
),
message: format!("unexpected eof expected {:}", Token::RPar),
range: self.eof,
level: LogLevel::Error,
kind: LogKind::Lexer,
@ -183,9 +154,11 @@ impl<'a> Parser<'a> {
Spanned(Tuple(items), start.join(end))
}
pub fn parse_item(&mut self) -> Spanned<Item<'a>>{
match self.peek_token(){
Some(Spanned(Token::Ident(_)|Token::Tilde, _)) => self.parse_symbol().map(Item::Symbol),
pub fn parse_item(&mut self) -> Spanned<Item<'a>> {
match self.peek_token() {
Some(Spanned(Token::Ident(_) | Token::Tilde, _)) => {
self.parse_symbol().map(Item::Symbol)
}
Some(Spanned(Token::LPar, _)) => self.parse_tupple().map(Item::Tuple),
Some(Spanned(Token::LBrace, _)) => self.parse_list().map(Item::List),
Some(Spanned(got, r)) => {
@ -222,11 +195,11 @@ impl<'a> Parser<'a> {
}
}
pub fn parse_list(&mut self) -> Spanned<List<'a>>{
pub fn parse_list(&mut self) -> Spanned<List<'a>> {
let mut list = Vec::new();
let (matched, start) = self.expect_token(Token::LBrace);
if !matched{
return Spanned(List(Vec::new()), start)
if !matched {
return Spanned(List(Vec::new()), start);
}
while !matches!(self.peek_token(), Some(Spanned(Token::RBrace, _))) {
@ -234,12 +207,9 @@ impl<'a> Parser<'a> {
if matches!(self.peek_token(), Some(Spanned(Token::Comma, _))) {
self.next_token();
}
if self.peek_token().is_none(){
if self.peek_token().is_none() {
self.log.push(Log {
message: format!(
"unexpected eof expected {:}",
Token::RBrace
),
message: format!("unexpected eof expected {:}", Token::RBrace),
range: self.eof,
level: LogLevel::Error,
kind: LogKind::Lexer,
@ -251,28 +221,41 @@ impl<'a> Parser<'a> {
Spanned(List(list), start.join(end))
}
pub fn parse_regex(&mut self) -> Spanned<Regex<'a>>{
todo!()
}
pub fn parse_elements(&mut self) -> Vec<Spanned<TopLevel<'a>>> {
let mut result = Vec::new();
loop {
let Some(next) = self.next_token() else { break };
match next {
Spanned(Token::Ident(ident), ident_range) => {
let dest @ Spanned(_, start) = if matches!(self.peek_token(), Some(Spanned(Token::LPar, _))) {
match (next, self.peek_token()) {
(Spanned(Token::Ident(ident), start), Some(Spanned(Token::LPar, _))) => {
let tuple = self.parse_tupple();
let span = ident_range.join(tuple.1);
Spanned(Dest::Function(Spanned(ident, ident_range), tuple), span)
} else {
Spanned(Dest::Ident(ident), ident_range)
};
let span = start.join(tuple.1);
let dest = Spanned(Dest::Function(Spanned(ident, start), tuple), span);
self.expect_token(Token::Eq);
let item = self.parse_item();
let span = start.join(item.1);
result.push(Spanned(TopLevel::Assignment(dest, item), span));
}
(Spanned(Token::Ident(_), _), Some(Spanned(Token::LSmallArrow|Token::Ident(_), _))) => {
todo!()
}
(Spanned(Token::Ident(ident), start), _) => {
let dest = Spanned(Dest::Ident(ident), start);
self.expect_token(Token::Eq);
let item = self.parse_item();
let span = start.join(item.1);
result.push(Spanned(TopLevel::Assignment(dest, item), span));
}
_ => self.log.push(Log {
message: format!("unexpected token {:#}, expected {:}", next.0, Token::Ident("")),
message: format!(
"unexpected token {:#}, expected {:}",
next.0,
Token::Ident("")
),
range: next.1,
level: LogLevel::Error,
kind: LogKind::Lexer,