started parsing NPDAs

This commit is contained in:
ParkerTenBroeck 2025-12-19 18:06:21 -05:00
parent 7e0d4ca69a
commit f375757fd6
17 changed files with 920 additions and 216 deletions

171
src/loader/ast.rs Normal file
View file

@ -0,0 +1,171 @@
use std::ops::Range;
use super::Spanned;
#[derive(Clone, Debug)]
pub struct Tuple<'a>(pub Vec<Spanned<Item<'a>>>);
#[derive(Clone, Copy, Debug)]
pub enum Symbol<'a> {
Epsilon,
Ident(&'a str),
}
#[derive(Clone, Debug)]
pub enum Dest<'a> {
Ident(&'a str),
Function(Spanned<&'a str>, Spanned<Tuple<'a>>),
}
#[derive(Clone, Debug)]
pub enum Item<'a> {
Symbol(Symbol<'a>),
Tuple(Tuple<'a>),
List(List<'a>),
}
#[derive(Clone, Debug)]
pub enum Regex<'a> {
Terminal(&'a str),
Match {
complement: bool,
patterns: Vec<Range<char>>,
},
Concat(Vec<Regex<'a>>),
Star(Box<Regex<'a>>),
Plus(Box<Regex<'a>>),
Union(Vec<Regex<'a>>),
Intersection(Vec<Regex<'a>>),
Complement(Box<Regex<'a>>),
}
#[derive(Clone, Debug)]
pub struct List<'a>(pub Vec<Spanned<Item<'a>>>);
#[derive(Clone, Debug)]
pub enum TopLevel<'a> {
Assignment(Spanned<Dest<'a>>, Spanned<Item<'a>>),
ProductionRule(Spanned<Symbol<'a>>, Spanned<Symbol<'a>>),
Table(),
}
use crate::loader::log::Logs;
impl<'a> Spanned<Item<'a>> {
pub fn expect_ident(&self, logs: &mut Logs<'a>) -> Option<&'a str> {
match &self.0 {
Item::Symbol(Symbol::Ident(ident)) => return Some(ident),
Item::Symbol(Symbol::Epsilon) => {
logs.emit_error("expected ident found epsilon", self.1)
}
Item::Tuple(_) => logs.emit_error("expected ident found tuple", self.1),
Item::List(_) => logs.emit_error("expected ident found list", self.1),
}
None
}
pub fn expect_set(&self, logs: &mut Logs<'a>) -> Option<&[Spanned<Item<'a>>]> {
match &self.0 {
Item::Symbol(Symbol::Ident(_)) => logs.emit_error("expected set found ident", self.1),
Item::Symbol(Symbol::Epsilon) => logs.emit_error("expected set found epsilon", self.1),
Item::Tuple(_) => logs.emit_error("expected set found tuple", self.1),
Item::List(list) => return Some(&list.0),
}
None
}
pub fn expect_list(&self, logs: &mut Logs<'a>) -> Option<&[Spanned<Item<'a>>]> {
match &self.0 {
Item::Symbol(Symbol::Ident(_)) => logs.emit_error("expected list found ident", self.1),
Item::Symbol(Symbol::Epsilon) => logs.emit_error("expected list found epsilon", self.1),
Item::Tuple(_) => logs.emit_error("expected list found tuple", self.1),
Item::List(list) => return Some(&list.0),
}
None
}
pub fn list_weak(&self) -> &[Spanned<Item<'a>>] {
match &self.0 {
Item::List(list) => &list.0,
_ => std::slice::from_ref(self),
}
}
pub fn set_weak(&self) -> &[Spanned<Item<'a>>] {
match &self.0 {
Item::List(list) => &list.0,
_ => std::slice::from_ref(self),
}
}
pub fn expect_tuple(&self, logs: &mut Logs<'a>) -> Option<Spanned<&Tuple<'a>>> {
match &self.0 {
Item::Symbol(Symbol::Ident(_)) => logs.emit_error("expected tuple found ident", self.1),
Item::Symbol(Symbol::Epsilon) => {
logs.emit_error("expected tuple found epsilon", self.1)
}
Item::Tuple(tuple) => return Some(Spanned(tuple, self.1)),
Item::List(_) => logs.emit_error("expected tuple found list", self.1),
}
None
}
}
impl<'a, 'b> Spanned<&'b Tuple<'a>> {
pub fn expect_dfa_transition(&self, logs: &mut Logs<'a>) -> ! {
todo!()
}
pub fn expect_nfa_transition(&self, logs: &mut Logs<'a>) -> ! {
todo!()
}
pub fn expect_dpda_transition(&self, logs: &mut Logs<'a>) -> ! {
todo!()
}
pub fn expect_npda_transition_function(
&self,
logs: &mut Logs<'a>,
) -> Option<(Spanned<&'a str>, Spanned<Symbol<'a>>, Spanned<&'a str>)> {
match &self.0.0[..] {
[
Spanned(Item::Symbol(Symbol::Ident(state)), state_span),
Spanned(Item::Symbol(letter), letter_span),
Spanned(Item::Symbol(Symbol::Ident(symbol)), symbol_span),
] => {
return Some((
Spanned(state, *state_span),
Spanned(*letter, *letter_span),
Spanned(symbol, *symbol_span),
));
}
_ => logs.emit_error(
"expected NPDA transition function (ident, ident|~, ident)",
self.1,
),
}
None
}
pub fn expect_npda_transition(
&self,
logs: &mut Logs<'a>,
) -> Option<(Spanned<&'a str>, &'b [Spanned<Item<'a>>])> {
match &self.0.0[..] {
[
Spanned(Item::Symbol(Symbol::Ident(state)), state_span),
list,
] => {
return Some((Spanned(state, *state_span), list.list_weak()));
}
_ => logs.emit_error("expected NPDA transition (ident, item|[item])", self.1),
}
None
}
pub fn expect_tm_transition(&self, logs: &mut Logs<'a>) -> ! {
todo!()
}
pub fn expect_ntm_transition(&self, logs: &mut Logs<'a>) -> ! {
todo!()
}
}

213
src/loader/lexer.rs Normal file
View file

@ -0,0 +1,213 @@
use crate::loader::{Span, Spanned};
#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)]
pub enum Token<'a> {
LPar,
RPar,
LBrace,
RBrace,
LBracket,
RBracket,
Tilde,
Eq,
Comma,
Or,
Plus,
Star,
And,
LSmallArrow,
LBigArrow,
Comment(&'a str),
Ident(&'a str),
}
impl<'a> std::fmt::Display for Token<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Token::LPar => write!(f, "')'"),
Token::RPar => write!(f, "'('"),
Token::LBrace => write!(f, "'{{'"),
Token::RBrace => write!(f, "'}}'"),
Token::LBracket => write!(f, "'['"),
Token::RBracket => write!(f, "']'"),
Token::Tilde => write!(f, "'~'"),
Token::Eq => write!(f, "'='"),
Token::Comma => write!(f, "','"),
Token::Or => write!(f, "'|'"),
Token::Plus => write!(f, "'+'"),
Token::Star => write!(f, "'*'"),
Token::And => write!(f, "'&'"),
Token::LSmallArrow => write!(f, "'->'"),
Token::LBigArrow => write!(f, "'=>'"),
Token::Comment(_) => write!(f, "<comment>"),
Token::Ident(ident) if f.alternate() => write!(f, "{ident:?}"),
Token::Ident(_) => write!(f, "ident"),
}
}
}
#[derive(Clone, Copy, Debug)]
pub struct Lexer<'a> {
input: &'a str,
start: usize,
position: usize,
}
#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)]
pub enum Error {
InvalidChar(char),
UnclosedMultiLine,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
Self {
input,
start: 0,
position: 0,
}
}
fn consume(&mut self) -> Option<char> {
let next = self.input.get(self.position..)?.chars().next()?;
self.position += next.len_utf8();
Some(next)
}
fn peek(&mut self) -> Option<char> {
self.input.get(self.position..)?.chars().next()
}
fn backtrack(&mut self) {
if let Some(consumed) = self.input.get(..self.position)
&& let Some(previous) = consumed.chars().next_back()
{
self.position -= previous.len_utf8();
}
}
pub fn eof_span(&self) -> Span {
Span(self.input.len(), self.input.len())
}
pub fn input(&self) -> &'a str {
self.input
}
}
fn begin_ident(c: char) -> bool {
c.is_alphabetic() || c == '_' || (!c.is_ascii() && !c.is_control() && !c.is_whitespace())
}
fn continue_ident(c: char) -> bool {
c.is_alphanumeric() || c == '_' || (!c.is_ascii() && !c.is_control() && !c.is_whitespace())
}
impl<'a> std::iter::Iterator for Lexer<'a> {
type Item = Spanned<Result<Token<'a>, Error>>;
fn next(&mut self) -> Option<Self::Item> {
while let Some(c) = self.peek()
&& c.is_whitespace()
{
self.consume();
}
self.start = self.position;
let res = match self.consume()? {
'(' => Ok(Token::LPar),
')' => Ok(Token::RPar),
'{' => Ok(Token::LBrace),
'}' => Ok(Token::RBrace),
'[' => Ok(Token::LBracket),
']' => Ok(Token::RBracket),
'~' => Ok(Token::Tilde),
'+' => Ok(Token::Plus),
'*' => Ok(Token::Star),
'&' => Ok(Token::And),
',' => Ok(Token::Comma),
'|' => Ok(Token::Or),
'=' => match self.peek() {
Some('>') => {
self.consume();
Ok(Token::LBigArrow)
}
_ => Ok(Token::Eq),
},
'-' => match self.peek() {
Some('>') => {
self.consume();
Ok(Token::LSmallArrow)
}
_ => Err(Error::InvalidChar('-')),
},
'/' => match self.consume() {
Some('/') => loop {
if let Some('\n') | None = self.consume() {
break Ok(Token::Comment(&self.input[self.start + 2..self.position]));
}
},
Some('*') => loop {
match self.consume() {
Some('*') if self.peek() == Some('/') => {
self.consume();
break Ok(Token::Comment(
&self.input[self.start + 2..self.position - 2],
));
}
Some(_) => {}
None => break Err(Error::UnclosedMultiLine),
}
},
Some(_) => {
self.backtrack();
Err(Error::InvalidChar('/'))
}
None => Err(Error::InvalidChar('/')),
},
c if begin_ident(c) => loop {
match self.consume() {
Some(c) if continue_ident(c) => {}
Some(_) => {
self.backtrack();
break Ok(Token::Ident(&self.input[self.start..self.position]));
}
None => break Ok(Token::Ident(&self.input[self.start..self.position])),
}
},
c => Err(Error::InvalidChar(c)),
};
let span = Span(self.start, self.position);
self.start = self.position;
Some(Spanned(res, span))
}
}
#[test]
fn tokenizer() {
let tests = [
"",
"/*",
"/**",
"/*/",
"/**/",
"/",
"//",
"()[]{}~=>==>->-+*&|, hello _th012is__ a wondweful",
];
for test in tests {
println!("'{test}': {:?}", Lexer::new(test).collect::<Vec<_>>())
}
}

155
src/loader/log.rs Normal file
View file

@ -0,0 +1,155 @@
use std::{borrow::Cow, fmt::Display};
use crate::loader::Span;
pub struct Logs<'a> {
logs: Vec<LogEntry>,
src: Cow<'a, str>,
has_error: bool,
}
impl<'a> Logs<'a> {
pub fn new(src: impl Into<Cow<'a, str>>) -> Self {
Self {
logs: Vec::new(),
src: src.into(),
has_error: false,
}
}
pub fn contains_errors(&self) -> bool {
self.has_error
}
pub fn emit(&mut self, entry: LogEntry) {
self.has_error |= matches!(entry.level, LogLevel::Error);
self.logs.push(entry);
}
pub fn emit_error(&mut self, msg: impl Into<String>, span: Span) {
self.emit(LogEntry {
message: msg.into(),
span,
level: LogLevel::Error,
});
}
pub fn emit_warning(&mut self, msg: impl Into<String>, span: Span) {
self.emit(LogEntry {
message: msg.into(),
span,
level: LogLevel::Warning,
});
}
pub fn emit_info(&mut self, msg: impl Into<String>, span: Span) {
self.emit(LogEntry {
message: msg.into(),
span,
level: LogLevel::Info,
});
}
pub fn displayable(&self) -> impl Iterator<Item = LogEntryDisplay<'_>> {
self.logs.iter().map(|entry| LogEntryDisplay {
src: &self.src,
entry,
})
}
}
pub enum LogLevel {
Info,
Warning,
Error,
}
pub struct LogEntry {
pub message: String,
pub span: Span,
pub level: LogLevel,
}
pub struct LogEntryDisplay<'a> {
src: &'a str,
entry: &'a LogEntry,
}
impl<'a> Display for LogEntryDisplay<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
pub const RESET: &str = "\x1b[0;22m";
pub const BOLD: &str = "\x1b[1m";
// pub const UNDERLINE: &str = "\x1b[4m";
pub const RED: &str = "\x1b[31m";
// pub const GREEN: &str = "\x1b[32m";
pub const YELLOW: &str = "\x1b[33m";
// pub const BLUE: &str = "\x1b[34m";
pub const CYAN: &str = "\x1b[36m";
match self.entry.level {
LogLevel::Info => write!(f, "{BOLD}{CYAN}info{RESET}{BOLD}: ")?,
LogLevel::Warning => write!(f, "{BOLD}{YELLOW}warning{RESET}{BOLD}: ")?,
LogLevel::Error => write!(f, "{BOLD}{RED}error{RESET}{BOLD}: ")?,
}
writeln!(f, "{}{RESET}", self.entry.message)?;
let line_start = self
.src
.get(..=self.entry.span.0)
.unwrap_or("")
.lines()
.count();
let line_end = self
.src
.get(..self.entry.span.1)
.unwrap_or("")
.lines()
.count();
let padding = line_end.ilog10() as usize;
let start = self
.src
.get(..self.entry.span.0)
.and_then(|s| s.rfind('\n'))
.map(|v| v + 1)
.unwrap_or(0);
let end = self
.src
.get(self.entry.span.1..)
.and_then(|s| s.find('\n'))
.map(|v| v + self.entry.span.1)
.unwrap_or(self.src.len());
let mut index = start;
for (i, line) in self.src.get(start..end).unwrap_or("").lines().enumerate() {
write!(f, "{BOLD}{CYAN}{:>padding$}: {RESET}", i + line_start)?;
for char in line.chars() {
if char == '\t' {
write!(f, " ")?
} else {
write!(f, "{char}")?
}
}
writeln!(f)?;
write!(f, "{BOLD}{CYAN}")?;
for _ in 0..padding + 3 {
write!(f, " ")?;
}
for char in line.chars() {
if (self.entry.span.0..self.entry.span.1).contains(&index) {
write!(f, "~")?;
} else {
write!(f, " ")?;
}
index += char.len_utf8();
}
write!(f, "{RESET}")?;
index += '\n'.len_utf8();
writeln!(f)?;
}
Ok(())
}
}

30
src/loader/mod.rs Normal file
View file

@ -0,0 +1,30 @@
pub mod ast;
pub mod lexer;
pub mod log;
pub mod parser;
pub const EPSILON_LOWER: &str = "Ɛ";
pub const DELTA_LOWER: &str = "δ";
pub const SIGMA_UPPER: &str = "Σ";
pub const GAMMA_UPPER: &str = "Γ";
pub const GAMMA_LOWER: &str = "γ";
#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)]
pub struct Span(pub usize, pub usize);
impl Span {
pub fn join(&self, end: Span) -> Span {
Span(self.0, end.1)
}
}
#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)]
pub struct Spanned<T>(pub T, pub Span);
impl<T> Spanned<T> {
pub fn map<R>(self, map: impl Fn(T) -> R) -> Spanned<R> {
Spanned(map(self.0), self.1)
}
pub fn as_ref(&self) -> Spanned<&T> {
Spanned(&self.0, self.1)
}
}

254
src/loader/parser.rs Normal file
View file

@ -0,0 +1,254 @@
use crate::loader::log::{LogEntryDisplay, Logs};
use crate::loader::{Span, Spanned};
use super::ast::*;
use super::lexer::{Lexer, Token};
use std::iter::Peekable;
pub struct Parser<'a> {
lexer: Peekable<Lexer<'a>>,
logs: Logs<'a>,
src: &'a str,
eof: Span,
}
impl<'a> Parser<'a> {
pub fn new(lexer: Lexer<'a>) -> Self {
Parser {
eof: lexer.eof_span(),
src: lexer.input(),
logs: Logs::new(lexer.input()),
lexer: lexer.peekable(),
}
}
fn next_token(&mut self) -> Option<Spanned<Token<'a>>> {
loop {
match self.lexer.next()? {
Spanned(Ok(Token::Comment(_)), _) => {}
Spanned(Ok(ok), r) => return Some(Spanned(ok, r)),
Spanned(Err(err), span) => self.logs.emit_error(format!("lexer: {err:?}"), span),
}
}
}
fn peek_token(&mut self) -> Option<Spanned<Token<'a>>> {
loop {
match *self.lexer.peek()? {
Spanned(Ok(ok), r) => return Some(Spanned(ok, r)),
Spanned(Err(err), span) => self.logs.emit_error(format!("lexer: {err:?}"), span),
}
}
}
fn expect_token(&mut self, expected: Token<'a>) -> (bool, Span) {
if let Some(Spanned(token, span)) = self.next_token() {
if token != expected {
self.logs.emit_error(
format!("unexpected token {:#}, expected {:}", token, expected),
span,
);
(false, span)
} else {
(true, span)
}
} else {
self.logs
.emit_error(format!("unexpected eof expected {:#}", expected), self.eof);
(false, self.eof)
}
}
pub fn parse_symbol(&mut self) -> Spanned<Symbol<'a>> {
match self.next_token() {
Some(Spanned(Token::Tilde, r)) => Spanned(Symbol::Epsilon, r),
Some(Spanned(Token::Ident("epsilon"), r)) => Spanned(Symbol::Epsilon, r),
Some(Spanned(Token::Ident(super::EPSILON_LOWER), r)) => Spanned(Symbol::Epsilon, r),
Some(Spanned(Token::Ident(ident), r)) => Spanned(Symbol::Ident(ident), r),
Some(Spanned(got, span)) => {
self.logs.emit_error(
format!(
"unexpected token {:#}, expected {:}|{:}",
got,
Token::Tilde,
Token::Ident("")
),
span,
);
Spanned(Symbol::Ident("<INVALID>"), span)
}
None => {
self.logs.emit_error(
format!(
"unexpected eof expected {:}|{:}",
Token::Tilde,
Token::Ident("")
),
self.eof,
);
Spanned(Symbol::Ident("<INVALID>"), self.eof)
}
}
}
pub fn parse_tupple(&mut self) -> Spanned<Tuple<'a>> {
let mut items = Vec::new();
let (matched, start) = self.expect_token(Token::LPar);
if !matched {
return Spanned(Tuple(Vec::new()), start);
}
while !matches!(self.peek_token(), Some(Spanned(Token::RPar, _))) {
items.push(self.parse_item());
if matches!(self.peek_token(), Some(Spanned(Token::Comma, _))) {
self.next_token();
}
if self.peek_token().is_none() {
self.logs.emit_error(
format!("unexpected eof expected {:}", Token::RPar),
self.eof,
);
break;
}
}
let (_, end) = self.expect_token(Token::RPar);
Spanned(Tuple(items), start.join(end))
}
pub fn parse_item(&mut self) -> Spanned<Item<'a>> {
match self.peek_token() {
Some(Spanned(Token::Ident(_) | Token::Tilde, _)) => {
self.parse_symbol().map(Item::Symbol)
}
Some(Spanned(Token::LPar, _)) => self.parse_tupple().map(Item::Tuple),
Some(Spanned(Token::LBrace | Token::LBracket, _)) => self.parse_list().map(Item::List),
Some(Spanned(got, span)) => {
self.logs.emit_error(
format!(
"unexpected token {:#}, expected {:}|{:}|{:}|{:}|{:}",
got,
Token::Tilde,
Token::Ident(""),
Token::LPar,
Token::LBrace,
Token::LBracket,
),
span,
);
Spanned(Item::Symbol(Symbol::Ident("<INVALID>")), span)
}
None => {
self.logs.emit_error(
format!(
"unexpected eof expected {:}|{:}|{:}|{:}|{:}",
Token::Tilde,
Token::Ident(""),
Token::LPar,
Token::LBrace,
Token::LBracket,
),
self.eof,
);
Spanned(Item::Symbol(Symbol::Ident("<INVALID>")), self.eof)
}
}
}
pub fn parse_list(&mut self) -> Spanned<List<'a>> {
let mut list = Vec::new();
let (start, match_end) = match self.next_token() {
Some(Spanned(Token::LBrace, r)) => (r, Token::RBrace),
Some(Spanned(Token::LBracket, r)) => (r, Token::RBracket),
Some(Spanned(got, span)) => {
self.logs.emit_error(
format!(
"unexpected token {:#}, expected {:}|{:}",
got,
Token::RBrace,
Token::RBracket
),
span,
);
return Spanned(List(Vec::new()), span);
}
None => {
self.logs.emit_error(
format!(
"unexpected eof expected {:}|{:}",
Token::RBrace,
Token::RBracket
),
self.eof,
);
return Spanned(List(Vec::new()), self.eof);
}
};
while self.peek_token().map(|t| t.0) != Some(match_end) {
list.push(self.parse_item());
if matches!(self.peek_token(), Some(Spanned(Token::Comma, _))) {
self.next_token();
}
if self.peek_token().is_none() {
self.logs
.emit_error(format!("unexpected eof expected {:}", match_end), self.eof);
break;
}
}
let (_, end) = self.expect_token(match_end);
Spanned(List(list), start.join(end))
}
pub fn parse_regex(&mut self) -> Spanned<Regex<'a>> {
todo!()
}
pub fn parse_elements(mut self) -> (Vec<Spanned<TopLevel<'a>>>, Logs<'a>) {
let mut result = Vec::new();
loop {
let Some(next) = self.next_token() else { break };
match (next, self.peek_token()) {
(Spanned(Token::Ident(ident), start), Some(Spanned(Token::LPar, _))) => {
let tuple = self.parse_tupple();
let span = start.join(tuple.1);
let dest = Spanned(Dest::Function(Spanned(ident, start), tuple), span);
self.expect_token(Token::Eq);
let item = self.parse_item();
let span = start.join(item.1);
result.push(Spanned(TopLevel::Assignment(dest, item), span));
}
(
Spanned(Token::Ident(_), _),
Some(Spanned(Token::LSmallArrow | Token::Ident(_), _)),
) => {
todo!()
}
(Spanned(Token::Ident(ident), start), _) => {
let dest = Spanned(Dest::Ident(ident), start);
self.expect_token(Token::Eq);
let item = self.parse_item();
let span = start.join(item.1);
result.push(Spanned(TopLevel::Assignment(dest, item), span));
}
_ => self.logs.emit_error(
format!(
"unexpected token {:#}, expected {:}",
next.0,
Token::Ident("")
),
next.1,
),
}
}
(result, self.logs)
}
pub fn logs(&self) -> impl Iterator<Item = LogEntryDisplay<'_>> {
self.logs.displayable()
}
}