From f375757fd6b2d78bee481b8eaa93a0213e31747b Mon Sep 17 00:00:00 2001 From: ParkerTenBroeck <51721964+ParkerTenBroeck@users.noreply.github.com> Date: Fri, 19 Dec 2025 18:06:21 -0500 Subject: [PATCH] started parsing NPDAs --- example.npda | 19 +++ example.txt | 17 -- src/ast.rs | 50 ------ src/{ => automata}/dfa.rs | 4 +- src/automata/dpda.rs | 1 + src/automata/mod.rs | 85 ++++++++++ src/automata/nfa.rs | 1 + src/automata/npda.rs | 319 +++++++++++++++++++++++++++++++++++++ src/automata/ntm.rs | 1 + src/automata/tm.rs | 1 + src/lib.rs | 14 +- src/loader/ast.rs | 171 ++++++++++++++++++++ src/{ => loader}/lexer.rs | 34 ++-- src/loader/log.rs | 155 ++++++++++++++++++ src/loader/mod.rs | 30 ++++ src/{ => loader}/parser.rs | 215 ++++++++++++------------- src/main.rs | 19 ++- 17 files changed, 920 insertions(+), 216 deletions(-) create mode 100644 example.npda delete mode 100644 example.txt delete mode 100644 src/ast.rs rename src/{ => automata}/dfa.rs (72%) create mode 100644 src/automata/dpda.rs create mode 100644 src/automata/mod.rs create mode 100644 src/automata/nfa.rs create mode 100644 src/automata/npda.rs create mode 100644 src/automata/ntm.rs create mode 100644 src/automata/tm.rs create mode 100644 src/loader/ast.rs rename src/{ => loader}/lexer.rs (91%) create mode 100644 src/loader/log.rs create mode 100644 src/loader/mod.rs rename src/{ => loader}/parser.rs (55%) diff --git a/example.npda b/example.npda new file mode 100644 index 0000000..ba0195e --- /dev/null +++ b/example.npda @@ -0,0 +1,19 @@ + +Q = {q0, q1} // states +E = {a, b} // alphabet +T = {z0, A, B} // stack +I = q0 + +// construct all possible permutations of A's and B's +d(q0, epsilon, z0) = { (q0, [A z0]), (q0, [B z0]) } +d(q0, epsilon, A) = { (q0, [A A]), (q0, [B A]) } +d(q0, epsilon, B) = { (q0, [A B]), (q0, [B B]) } + +// transition to q1 +d(q0, epsilon, z0)={ (q1, z0) } +d(q0, epsilon, A)={ (q1, A) } +d(q0, epsilon, B)={ (q1, B) } + +// consume stack until empty +d(q1, a, A)={(q1, epsilon)} +d(q1, b, B)={(q1, epsilon)} diff --git a/example.txt b/example.txt deleted file mode 100644 index d0526f8..0000000 --- a/example.txt +++ /dev/null @@ -1,17 +0,0 @@ -Q = {q0, q1} // states -E = {a, b} // alphabet -T = {z0, A, B} // stack symbols - -// construct all possible permutations of A's and B's -d(q0, epsilon, z0)={(q0, A z0), (q0, B z0)} -d(q0, epsilon, A)={(q0, A A), (q0, B A)} -d(q0, epsilon, B)={(q0, A B), (q0, B B)} - -// transition to q1 -d(q0, epsilon, z0)={(q1, z0)} -d(q0, epsilon, A)={(q1, A)} -d(q0, epsilon, B)={(q1, B)} - -// consume stack until empty -d(q1, a, A)={(q1, epsilon)} -d(q1, b, B)={(q1, epsilon)} \ No newline at end of file diff --git a/src/ast.rs b/src/ast.rs deleted file mode 100644 index e5c1463..0000000 --- a/src/ast.rs +++ /dev/null @@ -1,50 +0,0 @@ -use std::ops::Range; - -use crate::lexer::Spanned; - -#[derive(Clone, Debug)] -pub struct Tuple<'a>(pub Vec>>); - -#[derive(Clone, Debug)] -pub enum Symbol<'a> { - Epsilon, - Ident(&'a str), -} - -#[derive(Clone, Debug)] -pub enum Dest<'a> { - Ident(&'a str), - Function(Spanned<&'a str>, Spanned>), -} - -#[derive(Clone, Debug)] -pub enum Item<'a> { - Symbol(Symbol<'a>), - Tuple(Tuple<'a>), - List(List<'a>), -} - -#[derive(Clone, Debug)] -pub enum Regex<'a>{ - Terminal(&'a str), - Match{ - complement: bool, - patterns: Vec> - }, - Concat(Vec>), - Star(Box>), - Plus(Box>), - Union(Vec>), - Intersection(Vec>), - Complement(Box>), -} - -#[derive(Clone, Debug)] -pub struct List<'a>(pub Vec>>); - -#[derive(Clone, Debug)] -pub enum TopLevel<'a> { - Assignment(Spanned>, Spanned>), - ProductionRule(Spanned>, Spanned>), - Table(), -} \ No newline at end of file diff --git a/src/dfa.rs b/src/automata/dfa.rs similarity index 72% rename from src/dfa.rs rename to src/automata/dfa.rs index 8fb3bb0..95e41dc 100644 --- a/src/dfa.rs +++ b/src/automata/dfa.rs @@ -1,9 +1,9 @@ -use crate::*; +use super::*; pub struct TransitionTable { initial: State, state_names: Vec, - transitions: Vec>, + // transitions: Vec>, final_states: Vec, } diff --git a/src/automata/dpda.rs b/src/automata/dpda.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/automata/dpda.rs @@ -0,0 +1 @@ + diff --git a/src/automata/mod.rs b/src/automata/mod.rs new file mode 100644 index 0000000..17ee186 --- /dev/null +++ b/src/automata/mod.rs @@ -0,0 +1,85 @@ +use std::collections::HashMap; + +pub mod dfa; +pub mod dpda; +pub mod nfa; +pub mod npda; +pub mod ntm; +pub mod tm; + +#[derive(Clone, Debug, Copy, Hash, PartialEq, Eq)] +pub struct State(u16); + +#[derive(Clone, Debug, Copy, Hash, PartialEq, Eq)] +pub struct Symbol(u16); + +#[derive(Clone, Debug)] +pub struct StateMap(Vec); + +trait Get { + type Output; + fn get(&self, index: Idx) -> Option<&Self::Output>; +} + +impl Get for StateMap { + type Output = T; + + fn get(&self, index: State) -> Option<&Self::Output> { + self.0.get(index.0 as usize) + } +} + +#[derive(Clone, Debug)] +pub struct SymbolMap(Vec); + +impl Get for SymbolMap { + type Output = T; + + fn get(&self, index: Symbol) -> Option<&Self::Output> { + self.0.get(index.0 as usize) + } +} + +#[derive(Clone, Debug, Default)] +pub struct StateSymbolMap { + map: Vec, + max_state: u16, +} + +impl Get<(State, Symbol)> for StateSymbolMap { + type Output = T; + + fn get(&self, (state, symbol): (State, Symbol)) -> Option<&Self::Output> { + self.map + .get(state.0 as usize + self.max_state as usize * symbol.0 as usize) + } +} + +#[derive(Clone, Debug, Default)] +pub struct CharMap(HashMap); +impl Get for CharMap { + type Output = T; + + fn get(&self, index: char) -> Option<&Self::Output> { + self.0.get(&index) + } +} + +#[derive(Clone, Debug, Default)] +pub struct CharEpsilonMap(HashMap, T>); + +impl Get for CharEpsilonMap { + type Output = T; + + fn get(&self, index: char) -> Option<&Self::Output> { + self.0.get(&Some(index)) + } +} + +impl Get> for CharEpsilonMap { + type Output = T; + + fn get(&self, index: Option) -> Option<&Self::Output> { + self.0.get(&index) + } +} diff --git a/src/automata/nfa.rs b/src/automata/nfa.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/automata/nfa.rs @@ -0,0 +1 @@ + diff --git a/src/automata/npda.rs b/src/automata/npda.rs new file mode 100644 index 0000000..712fcf2 --- /dev/null +++ b/src/automata/npda.rs @@ -0,0 +1,319 @@ +use std::collections::HashSet; + +use super::*; + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +struct To(State, Vec); + +#[derive(Clone, Debug)] +pub struct TransitionTable { + pub(in super::npda) initial_state: State, + initial_stack: Symbol, + state_names: Vec, + symbol_names: Vec, + alphabet: HashSet, + + accept_empty: bool, + final_states: Vec, + transitions: StateSymbolMap>>, +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct NPDA { + pub state: State, + pub stack: Vec, + pub position: usize, +} + +pub struct Simulator { + input: String, + table: TransitionTable, + running: Vec, +} + +impl Simulator { + pub fn begin(input: impl Into, table: TransitionTable) -> Self { + Self { + input: input.into(), + running: vec![NPDA { + state: table.initial_state, + stack: vec![table.initial_stack], + position: 0, + }], + table, + } + } + + pub fn step(&mut self) -> Option { + let mut new = Vec::new(); + for mut npda in self.running.drain(..) { + let Some(top) = npda.stack.pop() else { + continue; + }; + + for to in self + .table + .transitions + .get((npda.state, top)) + .and_then(|t| t.get(None)) + .iter() + .flat_map(|t| t.iter()) + { + let mut stack = npda.stack.clone(); + stack.extend_from_slice(&to.1); + new.push(NPDA { + state: to.0, + stack, + position: npda.position, + }); + } + + let Some(next) = self + .input + .get(npda.position..) + .and_then(|c| c.chars().next()) + else { + if self.table.final_states[npda.state.0 as usize] + || self.table.accept_empty && npda.stack == [self.table.initial_stack] + { + return Some(npda.clone()); + } else { + continue; + } + }; + + for to in self + .table + .transitions + .get((npda.state, top)) + .and_then(|t| t.get(Some(next))) + .iter() + .flat_map(|t| t.iter()) + { + let mut stack = npda.stack.clone(); + stack.extend_from_slice(&to.1); + new.push(NPDA { + state: to.0, + stack, + position: npda.position + next.len_utf8(), + }); + } + } + self.running = new; + None + } +} + +// ------ parser/semantics + +use crate::loader::{ + DELTA_LOWER, GAMMA_UPPER, SIGMA_UPPER, Spanned, + ast::{self, Symbol as Sym, Tuple}, + lexer::Lexer, + log::Logs, + parser::Parser, +}; + +impl TransitionTable { + pub fn load_table<'a>(input: &'a str) -> Result<(TransitionTable, Logs<'a>), Logs<'a>> { + let (ast, logs) = Parser::new(Lexer::new(input)).parse_elements(); + if logs.contains_errors() { + return Err(logs); + } + Self::load_from_ast(&ast, logs) + } + + pub fn load_from_ast<'a>( + ast: &Vec>>, + mut logs: Logs<'a>, + ) -> Result<(TransitionTable, Logs<'a>), Logs<'a>> { + let mut initial_state = None; + let mut initial_stack = None; + + let mut states = HashSet::new(); + let mut stack_symbols = HashSet::new(); + let mut alphabet = HashSet::new(); + let mut final_states = None; + let mut accept_empty = false; + + for Spanned(element, span) in ast { + use Spanned as S; + use ast::Dest; + use ast::TopLevel as TL; + match element { + TL::Assignment(S(Dest::Ident("Q"), _), list) => { + if !states.is_empty() { + logs.emit_error("states already set", *span); + } + let Some(list) = list.expect_set(&mut logs) else { + continue; + }; + for item in list { + let Some(ident) = item.expect_ident(&mut logs) else { + continue; + }; + if !states.insert(ident) { + logs.emit_error("state redefined", item.1); + } + } + + if list.is_empty(){ + logs.emit_error("states cannot be empty", *span); + } + } + TL::Assignment(S(Dest::Ident("E" | SIGMA_UPPER | "sigma"), _), list) => { + if !alphabet.is_empty() { + logs.emit_error("alphabet already set", *span); + } + let Some(list) = list.expect_set(&mut logs) else { + continue; + }; + for item in list { + let Some(ident) = item.expect_ident(&mut logs) else { + continue; + }; + + if ident.chars().count() != 1 { + logs.emit_error("letter cannot be longer than one char", item.1); + } + + if !alphabet.insert(ident) { + logs.emit_error("letter redefined", item.1); + } + } + if list.is_empty(){ + logs.emit_error("alphabet cannot be empty", *span); + } + } + TL::Assignment(S(Dest::Ident("F"), _), list) => { + let mut map = HashSet::new(); + let Some(list) = list.expect_set(&mut logs) else { + continue; + }; + for item in list { + let Some(ident) = item.expect_ident(&mut logs) else { + continue; + }; + if !states.contains(ident) { + logs.emit_error("final state not defined in set of states", item.1); + } + if !map.insert(ident) { + logs.emit_error("final states redefined", item.1); + } + } + + if final_states.is_some() { + logs.emit_error("final states already set", *span); + } + final_states = Some(map); + } + TL::Assignment(S(Dest::Ident("T" | GAMMA_UPPER | "gamma"), _), list) => { + if !stack_symbols.is_empty() { + logs.emit_error("stack symbols already set", *span); + } + let Some(list) = list.expect_set(&mut logs) else { + continue; + }; + for item in list { + let Some(ident) = item.expect_ident(&mut logs) else { + continue; + }; + if !stack_symbols.insert(ident) { + logs.emit_error("stack symbol redefined", item.1); + } + } + + if list.is_empty(){ + logs.emit_error("stack symbols cannot be empty", *span); + } + } + TL::Assignment(S(Dest::Ident("I" | "q0"), _), S(src, src_d)) => match src { + ast::Item::Symbol(Sym::Ident(ident)) => { + if !states.contains(ident) { + logs.emit_error("initial state symbol not defined as a state", *src_d); + } + if initial_state.is_some() { + logs.emit_error("initial state already set", *span); + } + initial_state = Some(ident) + } + _ => logs.emit_error("expected ident", *src_d), + }, + TL::Assignment(S(Dest::Ident("S" | "z0"), _), S(src, src_d)) => match src { + ast::Item::Symbol(Sym::Ident(ident)) => { + if !stack_symbols.contains(ident) + { + logs.emit_error( + "initial stack symbol not defined as a stack symbol", + *src_d, + ); + } + if initial_stack.is_some() { + logs.emit_error("initial stack already set", *span); + } + initial_stack = Some(ident) + } + _ => logs.emit_error("expected ident", *src_d), + }, + TL::Assignment(S(Dest::Ident(name), dest_s), _) => { + logs.emit_error(format!("unknown item {name:?}, expected 'Q'|'E'|'{SIGMA_UPPER}'|'sigma'|'F'|'T'|'{GAMMA_UPPER}'|'gamma'|'I'|'q0'|'S'|'z0'"), *dest_s); + } + + TL::Assignment( + S(Dest::Function(S("d" | DELTA_LOWER | "delta", _), tuple), _), + list, + ) => { + let list = list.set_weak(); + let Some((state, letter, sym)) = + tuple.as_ref().expect_npda_transition_function(&mut logs) + else { + continue; + }; + if !states.contains(state.0){ + logs.emit_error("transition state not defined as state", state.1); + } + if !stack_symbols.contains(sym.0){ + logs.emit_error("transition stack symbol not defined as stack symbol", sym.1); + } + + for item in list { + let Some((next_state, stack)) = item + .expect_tuple(&mut logs) + .and_then(|item| item.expect_npda_transition(&mut logs)) + else { + continue; + }; + + if !states.contains(next_state.0){ + logs.emit_error("transition state not defined as state", next_state.1); + } + } + } + TL::Assignment(S(Dest::Function(S(name, _), _), dest_s), _) => { + logs.emit_error( + format!("unknown function {name:?}, expected 'd'|'delta'|'{DELTA_LOWER}'"), + *dest_s, + ); + } + + TL::ProductionRule(_, _) => { + logs.emit_error("unexpected production rule", *span); + } + TL::Table() => logs.emit_error("unexpected table", *span), + } + } + + let table = TransitionTable { + initial_state: crate::automata::State(0), + initial_stack: crate::automata::Symbol(0), + state_names: Vec::new(), + symbol_names: Vec::new(), + alphabet: HashSet::new(), + accept_empty: false, + final_states: Vec::new(), + transitions: Default::default(), + }; + + Ok((table, logs)) + } +} diff --git a/src/automata/ntm.rs b/src/automata/ntm.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/automata/ntm.rs @@ -0,0 +1 @@ + diff --git a/src/automata/tm.rs b/src/automata/tm.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/automata/tm.rs @@ -0,0 +1 @@ + diff --git a/src/lib.rs b/src/lib.rs index 5a6efaf..345bf34 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,12 +1,2 @@ -pub mod dfa; -pub mod lexer; -pub mod parser; -pub mod ast; - -pub struct SymbolMap([T; 256]); - -#[derive(Clone, Copy, Hash, PartialEq, Eq)] -pub struct State(u16); - -#[derive(Clone, Copy, Hash, PartialEq, Eq)] -pub struct Symbol(u16); +pub mod automata; +pub mod loader; diff --git a/src/loader/ast.rs b/src/loader/ast.rs new file mode 100644 index 0000000..d1c36d9 --- /dev/null +++ b/src/loader/ast.rs @@ -0,0 +1,171 @@ +use std::ops::Range; + +use super::Spanned; + +#[derive(Clone, Debug)] +pub struct Tuple<'a>(pub Vec>>); + +#[derive(Clone, Copy, Debug)] +pub enum Symbol<'a> { + Epsilon, + Ident(&'a str), +} + +#[derive(Clone, Debug)] +pub enum Dest<'a> { + Ident(&'a str), + Function(Spanned<&'a str>, Spanned>), +} + +#[derive(Clone, Debug)] +pub enum Item<'a> { + Symbol(Symbol<'a>), + Tuple(Tuple<'a>), + List(List<'a>), +} + +#[derive(Clone, Debug)] +pub enum Regex<'a> { + Terminal(&'a str), + Match { + complement: bool, + patterns: Vec>, + }, + Concat(Vec>), + Star(Box>), + Plus(Box>), + Union(Vec>), + Intersection(Vec>), + Complement(Box>), +} + +#[derive(Clone, Debug)] +pub struct List<'a>(pub Vec>>); + +#[derive(Clone, Debug)] +pub enum TopLevel<'a> { + Assignment(Spanned>, Spanned>), + ProductionRule(Spanned>, Spanned>), + Table(), +} + +use crate::loader::log::Logs; + +impl<'a> Spanned> { + pub fn expect_ident(&self, logs: &mut Logs<'a>) -> Option<&'a str> { + match &self.0 { + Item::Symbol(Symbol::Ident(ident)) => return Some(ident), + Item::Symbol(Symbol::Epsilon) => { + logs.emit_error("expected ident found epsilon", self.1) + } + Item::Tuple(_) => logs.emit_error("expected ident found tuple", self.1), + Item::List(_) => logs.emit_error("expected ident found list", self.1), + } + None + } + + pub fn expect_set(&self, logs: &mut Logs<'a>) -> Option<&[Spanned>]> { + match &self.0 { + Item::Symbol(Symbol::Ident(_)) => logs.emit_error("expected set found ident", self.1), + Item::Symbol(Symbol::Epsilon) => logs.emit_error("expected set found epsilon", self.1), + Item::Tuple(_) => logs.emit_error("expected set found tuple", self.1), + Item::List(list) => return Some(&list.0), + } + None + } + + pub fn expect_list(&self, logs: &mut Logs<'a>) -> Option<&[Spanned>]> { + match &self.0 { + Item::Symbol(Symbol::Ident(_)) => logs.emit_error("expected list found ident", self.1), + Item::Symbol(Symbol::Epsilon) => logs.emit_error("expected list found epsilon", self.1), + Item::Tuple(_) => logs.emit_error("expected list found tuple", self.1), + Item::List(list) => return Some(&list.0), + } + None + } + + pub fn list_weak(&self) -> &[Spanned>] { + match &self.0 { + Item::List(list) => &list.0, + _ => std::slice::from_ref(self), + } + } + + pub fn set_weak(&self) -> &[Spanned>] { + match &self.0 { + Item::List(list) => &list.0, + _ => std::slice::from_ref(self), + } + } + + pub fn expect_tuple(&self, logs: &mut Logs<'a>) -> Option>> { + match &self.0 { + Item::Symbol(Symbol::Ident(_)) => logs.emit_error("expected tuple found ident", self.1), + Item::Symbol(Symbol::Epsilon) => { + logs.emit_error("expected tuple found epsilon", self.1) + } + Item::Tuple(tuple) => return Some(Spanned(tuple, self.1)), + Item::List(_) => logs.emit_error("expected tuple found list", self.1), + } + None + } +} + +impl<'a, 'b> Spanned<&'b Tuple<'a>> { + pub fn expect_dfa_transition(&self, logs: &mut Logs<'a>) -> ! { + todo!() + } + pub fn expect_nfa_transition(&self, logs: &mut Logs<'a>) -> ! { + todo!() + } + + pub fn expect_dpda_transition(&self, logs: &mut Logs<'a>) -> ! { + todo!() + } + + pub fn expect_npda_transition_function( + &self, + logs: &mut Logs<'a>, + ) -> Option<(Spanned<&'a str>, Spanned>, Spanned<&'a str>)> { + match &self.0.0[..] { + [ + Spanned(Item::Symbol(Symbol::Ident(state)), state_span), + Spanned(Item::Symbol(letter), letter_span), + Spanned(Item::Symbol(Symbol::Ident(symbol)), symbol_span), + ] => { + return Some(( + Spanned(state, *state_span), + Spanned(*letter, *letter_span), + Spanned(symbol, *symbol_span), + )); + } + _ => logs.emit_error( + "expected NPDA transition function (ident, ident|~, ident)", + self.1, + ), + } + None + } + pub fn expect_npda_transition( + &self, + logs: &mut Logs<'a>, + ) -> Option<(Spanned<&'a str>, &'b [Spanned>])> { + match &self.0.0[..] { + [ + Spanned(Item::Symbol(Symbol::Ident(state)), state_span), + list, + ] => { + return Some((Spanned(state, *state_span), list.list_weak())); + } + _ => logs.emit_error("expected NPDA transition (ident, item|[item])", self.1), + } + None + } + + pub fn expect_tm_transition(&self, logs: &mut Logs<'a>) -> ! { + todo!() + } + pub fn expect_ntm_transition(&self, logs: &mut Logs<'a>) -> ! { + todo!() + } +} diff --git a/src/lexer.rs b/src/loader/lexer.rs similarity index 91% rename from src/lexer.rs rename to src/loader/lexer.rs index d0200d7..3e7549e 100644 --- a/src/lexer.rs +++ b/src/loader/lexer.rs @@ -1,3 +1,5 @@ +use crate::loader::{Span, Spanned}; + #[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)] pub enum Token<'a> { LPar, @@ -51,22 +53,6 @@ impl<'a> std::fmt::Display for Token<'a> { } } -#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)] -pub struct Span(pub usize, pub usize); -impl Span { - pub fn join(&self, end: Span) -> Span { - Span(self.0, end.1) - } -} - -#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)] -pub struct Spanned(pub T, pub Span); -impl Spanned { - pub fn map(self, map: impl Fn(T) -> R) -> Spanned { - Spanned(map(self.0), self.1) - } -} - #[derive(Clone, Copy, Debug)] pub struct Lexer<'a> { input: &'a str, @@ -111,6 +97,18 @@ impl<'a> Lexer<'a> { pub fn eof_span(&self) -> Span { Span(self.input.len(), self.input.len()) } + + pub fn input(&self) -> &'a str { + self.input + } +} + +fn begin_ident(c: char) -> bool { + c.is_alphabetic() || c == '_' || (!c.is_ascii() && !c.is_control() && !c.is_whitespace()) +} + +fn continue_ident(c: char) -> bool { + c.is_alphanumeric() || c == '_' || (!c.is_ascii() && !c.is_control() && !c.is_whitespace()) } impl<'a> std::iter::Iterator for Lexer<'a> { @@ -177,9 +175,9 @@ impl<'a> std::iter::Iterator for Lexer<'a> { None => Err(Error::InvalidChar('/')), }, - c if c.is_alphabetic() || c == '_' => loop { + c if begin_ident(c) => loop { match self.consume() { - Some(c) if c.is_alphanumeric() || c == '_' => {} + Some(c) if continue_ident(c) => {} Some(_) => { self.backtrack(); break Ok(Token::Ident(&self.input[self.start..self.position])); diff --git a/src/loader/log.rs b/src/loader/log.rs new file mode 100644 index 0000000..270dfb2 --- /dev/null +++ b/src/loader/log.rs @@ -0,0 +1,155 @@ +use std::{borrow::Cow, fmt::Display}; + +use crate::loader::Span; + +pub struct Logs<'a> { + logs: Vec, + src: Cow<'a, str>, + has_error: bool, +} + +impl<'a> Logs<'a> { + pub fn new(src: impl Into>) -> Self { + Self { + logs: Vec::new(), + src: src.into(), + has_error: false, + } + } + + pub fn contains_errors(&self) -> bool { + self.has_error + } + + pub fn emit(&mut self, entry: LogEntry) { + self.has_error |= matches!(entry.level, LogLevel::Error); + self.logs.push(entry); + } + + pub fn emit_error(&mut self, msg: impl Into, span: Span) { + self.emit(LogEntry { + message: msg.into(), + span, + level: LogLevel::Error, + }); + } + + pub fn emit_warning(&mut self, msg: impl Into, span: Span) { + self.emit(LogEntry { + message: msg.into(), + span, + level: LogLevel::Warning, + }); + } + + pub fn emit_info(&mut self, msg: impl Into, span: Span) { + self.emit(LogEntry { + message: msg.into(), + span, + level: LogLevel::Info, + }); + } + + pub fn displayable(&self) -> impl Iterator> { + self.logs.iter().map(|entry| LogEntryDisplay { + src: &self.src, + entry, + }) + } +} + +pub enum LogLevel { + Info, + Warning, + Error, +} + +pub struct LogEntry { + pub message: String, + pub span: Span, + pub level: LogLevel, +} + +pub struct LogEntryDisplay<'a> { + src: &'a str, + entry: &'a LogEntry, +} + +impl<'a> Display for LogEntryDisplay<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + pub const RESET: &str = "\x1b[0;22m"; + pub const BOLD: &str = "\x1b[1m"; + // pub const UNDERLINE: &str = "\x1b[4m"; + pub const RED: &str = "\x1b[31m"; + // pub const GREEN: &str = "\x1b[32m"; + pub const YELLOW: &str = "\x1b[33m"; + // pub const BLUE: &str = "\x1b[34m"; + pub const CYAN: &str = "\x1b[36m"; + + match self.entry.level { + LogLevel::Info => write!(f, "{BOLD}{CYAN}info{RESET}{BOLD}: ")?, + LogLevel::Warning => write!(f, "{BOLD}{YELLOW}warning{RESET}{BOLD}: ")?, + LogLevel::Error => write!(f, "{BOLD}{RED}error{RESET}{BOLD}: ")?, + } + writeln!(f, "{}{RESET}", self.entry.message)?; + + let line_start = self + .src + .get(..=self.entry.span.0) + .unwrap_or("") + .lines() + .count(); + let line_end = self + .src + .get(..self.entry.span.1) + .unwrap_or("") + .lines() + .count(); + + let padding = line_end.ilog10() as usize; + + let start = self + .src + .get(..self.entry.span.0) + .and_then(|s| s.rfind('\n')) + .map(|v| v + 1) + .unwrap_or(0); + + let end = self + .src + .get(self.entry.span.1..) + .and_then(|s| s.find('\n')) + .map(|v| v + self.entry.span.1) + .unwrap_or(self.src.len()); + + let mut index = start; + for (i, line) in self.src.get(start..end).unwrap_or("").lines().enumerate() { + write!(f, "{BOLD}{CYAN}{:>padding$}: {RESET}", i + line_start)?; + for char in line.chars() { + if char == '\t' { + write!(f, " ")? + } else { + write!(f, "{char}")? + } + } + writeln!(f)?; + write!(f, "{BOLD}{CYAN}")?; + for _ in 0..padding + 3 { + write!(f, " ")?; + } + for char in line.chars() { + if (self.entry.span.0..self.entry.span.1).contains(&index) { + write!(f, "~")?; + } else { + write!(f, " ")?; + } + index += char.len_utf8(); + } + write!(f, "{RESET}")?; + index += '\n'.len_utf8(); + writeln!(f)?; + } + + Ok(()) + } +} diff --git a/src/loader/mod.rs b/src/loader/mod.rs new file mode 100644 index 0000000..b4dc1a1 --- /dev/null +++ b/src/loader/mod.rs @@ -0,0 +1,30 @@ +pub mod ast; +pub mod lexer; +pub mod log; +pub mod parser; + +pub const EPSILON_LOWER: &str = "Ɛ"; +pub const DELTA_LOWER: &str = "δ"; +pub const SIGMA_UPPER: &str = "Σ"; +pub const GAMMA_UPPER: &str = "Γ"; +pub const GAMMA_LOWER: &str = "γ"; + +#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)] +pub struct Span(pub usize, pub usize); +impl Span { + pub fn join(&self, end: Span) -> Span { + Span(self.0, end.1) + } +} + +#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)] +pub struct Spanned(pub T, pub Span); +impl Spanned { + pub fn map(self, map: impl Fn(T) -> R) -> Spanned { + Spanned(map(self.0), self.1) + } + + pub fn as_ref(&self) -> Spanned<&T> { + Spanned(&self.0, self.1) + } +} diff --git a/src/parser.rs b/src/loader/parser.rs similarity index 55% rename from src/parser.rs rename to src/loader/parser.rs index c879739..5818650 100644 --- a/src/parser.rs +++ b/src/loader/parser.rs @@ -1,30 +1,14 @@ -use std::{iter::Peekable}; -use crate::ast::*; -use crate::lexer::{Lexer, Span, Spanned, Token}; +use crate::loader::log::{LogEntryDisplay, Logs}; +use crate::loader::{Span, Spanned}; - - -pub enum LogKind { - Lexer, - UnexpectedToken, -} - -pub enum LogLevel { - Info, - Warning, - Error, -} - -pub struct Log { - pub message: String, - pub range: Span, - pub level: LogLevel, - pub kind: LogKind, -} +use super::ast::*; +use super::lexer::{Lexer, Token}; +use std::iter::Peekable; pub struct Parser<'a> { lexer: Peekable>, - log: Vec, + logs: Logs<'a>, + src: &'a str, eof: Span, } @@ -32,8 +16,9 @@ impl<'a> Parser<'a> { pub fn new(lexer: Lexer<'a>) -> Self { Parser { eof: lexer.eof_span(), + src: lexer.input(), + logs: Logs::new(lexer.input()), lexer: lexer.peekable(), - log: Vec::new(), } } @@ -42,12 +27,7 @@ impl<'a> Parser<'a> { match self.lexer.next()? { Spanned(Ok(Token::Comment(_)), _) => {} Spanned(Ok(ok), r) => return Some(Spanned(ok, r)), - Spanned(Err(err), r) => self.log.push(Log { - message: format!("{err:?}"), - range: r, - level: LogLevel::Error, - kind: LogKind::Lexer, - }), + Spanned(Err(err), span) => self.logs.emit_error(format!("lexer: {err:?}"), span), } } } @@ -56,36 +36,25 @@ impl<'a> Parser<'a> { loop { match *self.lexer.peek()? { Spanned(Ok(ok), r) => return Some(Spanned(ok, r)), - Spanned(Err(err), r) => self.log.push(Log { - message: format!("{err:?}"), - range: r, - level: LogLevel::Error, - kind: LogKind::Lexer, - }), + Spanned(Err(err), span) => self.logs.emit_error(format!("lexer: {err:?}"), span), } } } fn expect_token(&mut self, expected: Token<'a>) -> (bool, Span) { - if let Some(Spanned(token, range)) = self.next_token() { + if let Some(Spanned(token, span)) = self.next_token() { if token != expected { - self.log.push(Log { - message: format!("unexpected token {:#}, expected {:}", token, expected), - range, - level: LogLevel::Error, - kind: LogKind::Lexer, - }); - (false, range) + self.logs.emit_error( + format!("unexpected token {:#}, expected {:}", token, expected), + span, + ); + (false, span) } else { - (true, range) + (true, span) } } else { - self.log.push(Log { - message: format!("unexpected eof expected {:#}", expected), - range: self.eof, - level: LogLevel::Error, - kind: LogKind::Lexer, - }); + self.logs + .emit_error(format!("unexpected eof expected {:#}", expected), self.eof); (false, self.eof) } } @@ -94,33 +63,29 @@ impl<'a> Parser<'a> { match self.next_token() { Some(Spanned(Token::Tilde, r)) => Spanned(Symbol::Epsilon, r), Some(Spanned(Token::Ident("epsilon"), r)) => Spanned(Symbol::Epsilon, r), - Some(Spanned(Token::Ident("ε"), r)) => Spanned(Symbol::Epsilon, r), + Some(Spanned(Token::Ident(super::EPSILON_LOWER), r)) => Spanned(Symbol::Epsilon, r), Some(Spanned(Token::Ident(ident), r)) => Spanned(Symbol::Ident(ident), r), - Some(Spanned(got, r)) => { - self.log.push(Log { - message: format!( + Some(Spanned(got, span)) => { + self.logs.emit_error( + format!( "unexpected token {:#}, expected {:}|{:}", got, Token::Tilde, Token::Ident("") ), - range: self.eof, - level: LogLevel::Error, - kind: LogKind::Lexer, - }); - Spanned(Symbol::Ident(""), r) + span, + ); + Spanned(Symbol::Ident(""), span) } None => { - self.log.push(Log { - message: format!( + self.logs.emit_error( + format!( "unexpected eof expected {:}|{:}", Token::Tilde, Token::Ident("") ), - range: self.eof, - level: LogLevel::Error, - kind: LogKind::Lexer, - }); + self.eof, + ); Spanned(Symbol::Ident(""), self.eof) } } @@ -134,17 +99,15 @@ impl<'a> Parser<'a> { } while !matches!(self.peek_token(), Some(Spanned(Token::RPar, _))) { - items.push(self.parse_symbol()); + items.push(self.parse_item()); if matches!(self.peek_token(), Some(Spanned(Token::Comma, _))) { self.next_token(); } if self.peek_token().is_none() { - self.log.push(Log { - message: format!("unexpected eof expected {:}", Token::RPar), - range: self.eof, - level: LogLevel::Error, - kind: LogKind::Lexer, - }); + self.logs.emit_error( + format!("unexpected eof expected {:}", Token::RPar), + self.eof, + ); break; } } @@ -160,36 +123,34 @@ impl<'a> Parser<'a> { self.parse_symbol().map(Item::Symbol) } Some(Spanned(Token::LPar, _)) => self.parse_tupple().map(Item::Tuple), - Some(Spanned(Token::LBrace, _)) => self.parse_list().map(Item::List), - Some(Spanned(got, r)) => { - self.log.push(Log { - message: format!( - "unexpected token {:#}, expected {:}|{:}|{:}|{:}", + Some(Spanned(Token::LBrace | Token::LBracket, _)) => self.parse_list().map(Item::List), + Some(Spanned(got, span)) => { + self.logs.emit_error( + format!( + "unexpected token {:#}, expected {:}|{:}|{:}|{:}|{:}", got, Token::Tilde, Token::Ident(""), Token::LPar, - Token::LBrace + Token::LBrace, + Token::LBracket, ), - range: self.eof, - level: LogLevel::Error, - kind: LogKind::Lexer, - }); - Spanned(Item::Symbol(Symbol::Ident("")), r) + span, + ); + Spanned(Item::Symbol(Symbol::Ident("")), span) } None => { - self.log.push(Log { - message: format!( - "unexpected eof expected {:}|{:}|{:}|{:}", + self.logs.emit_error( + format!( + "unexpected eof expected {:}|{:}|{:}|{:}|{:}", Token::Tilde, Token::Ident(""), Token::LPar, - Token::LBrace + Token::LBrace, + Token::LBracket, ), - range: self.eof, - level: LogLevel::Error, - kind: LogKind::Lexer, - }); + self.eof, + ); Spanned(Item::Symbol(Symbol::Ident("")), self.eof) } } @@ -197,35 +158,55 @@ impl<'a> Parser<'a> { pub fn parse_list(&mut self) -> Spanned> { let mut list = Vec::new(); - let (matched, start) = self.expect_token(Token::LBrace); - if !matched { - return Spanned(List(Vec::new()), start); - } - while !matches!(self.peek_token(), Some(Spanned(Token::RBrace, _))) { + let (start, match_end) = match self.next_token() { + Some(Spanned(Token::LBrace, r)) => (r, Token::RBrace), + Some(Spanned(Token::LBracket, r)) => (r, Token::RBracket), + Some(Spanned(got, span)) => { + self.logs.emit_error( + format!( + "unexpected token {:#}, expected {:}|{:}", + got, + Token::RBrace, + Token::RBracket + ), + span, + ); + return Spanned(List(Vec::new()), span); + } + None => { + self.logs.emit_error( + format!( + "unexpected eof expected {:}|{:}", + Token::RBrace, + Token::RBracket + ), + self.eof, + ); + return Spanned(List(Vec::new()), self.eof); + } + }; + + while self.peek_token().map(|t| t.0) != Some(match_end) { list.push(self.parse_item()); if matches!(self.peek_token(), Some(Spanned(Token::Comma, _))) { self.next_token(); } if self.peek_token().is_none() { - self.log.push(Log { - message: format!("unexpected eof expected {:}", Token::RBrace), - range: self.eof, - level: LogLevel::Error, - kind: LogKind::Lexer, - }); + self.logs + .emit_error(format!("unexpected eof expected {:}", match_end), self.eof); break; } } - let (_, end) = self.expect_token(Token::RBrace); + let (_, end) = self.expect_token(match_end); Spanned(List(list), start.join(end)) } - pub fn parse_regex(&mut self) -> Spanned>{ + pub fn parse_regex(&mut self) -> Spanned> { todo!() } - pub fn parse_elements(&mut self) -> Vec>> { + pub fn parse_elements(mut self) -> (Vec>>, Logs<'a>) { let mut result = Vec::new(); loop { @@ -240,7 +221,10 @@ impl<'a> Parser<'a> { let span = start.join(item.1); result.push(Spanned(TopLevel::Assignment(dest, item), span)); } - (Spanned(Token::Ident(_), _), Some(Spanned(Token::LSmallArrow|Token::Ident(_), _))) => { + ( + Spanned(Token::Ident(_), _), + Some(Spanned(Token::LSmallArrow | Token::Ident(_), _)), + ) => { todo!() } (Spanned(Token::Ident(ident), start), _) => { @@ -250,18 +234,21 @@ impl<'a> Parser<'a> { let span = start.join(item.1); result.push(Spanned(TopLevel::Assignment(dest, item), span)); } - _ => self.log.push(Log { - message: format!( + _ => self.logs.emit_error( + format!( "unexpected token {:#}, expected {:}", next.0, Token::Ident("") ), - range: next.1, - level: LogLevel::Error, - kind: LogKind::Lexer, - }), + next.1, + ), } } - result + + (result, self.logs) + } + + pub fn logs(&self) -> impl Iterator> { + self.logs.displayable() } } diff --git a/src/main.rs b/src/main.rs index da8c6f9..0cfeefe 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,20 @@ -use automata::{lexer::Lexer, parser::Parser}; +use automata::automata::npda; fn main() { - let input = include_str!("../example.txt"); + let input = include_str!("../example.npda"); - println!("{:#?}", Parser::new(Lexer::new(input)).parse_elements()); + let table = match npda::TransitionTable::load_table(input) { + Ok((ok, logs)) => { + for log in logs.displayable() { + println!("{log}") + } + ok + } + Err(logs) => { + for log in logs.displayable() { + println!("{log}") + } + return; + } + }; }