diff --git a/example.npda b/example.npda index ba0195e..e228825 100644 --- a/example.npda +++ b/example.npda @@ -2,18 +2,21 @@ Q = {q0, q1} // states E = {a, b} // alphabet T = {z0, A, B} // stack -I = q0 +q0 = q0 +z0 = z0 // construct all possible permutations of A's and B's d(q0, epsilon, z0) = { (q0, [A z0]), (q0, [B z0]) } d(q0, epsilon, A) = { (q0, [A A]), (q0, [B A]) } + d(q0, epsilon, B) = { (q0, [A B]), (q0, [B B]) } // transition to q1 -d(q0, epsilon, z0)={ (q1, z0) } -d(q0, epsilon, A)={ (q1, A) } -d(q0, epsilon, B)={ (q1, B) } +d(q0, epsilon, z0) = { (q1, z0) } +d(q0, epsilon, A) = { (q1, A) } + +//d(q0, epsilon, B) = { (q1, B) } // consume stack until empty -d(q1, a, A)={(q1, epsilon)} -d(q1, b, B)={(q1, epsilon)} +d(q1, a, A) = { (q1, epsilon) } +d(q1, b, B) = { (q1, epsilon) } diff --git a/src/automata/mod.rs b/src/automata/mod.rs index 17ee186..89d2718 100644 --- a/src/automata/mod.rs +++ b/src/automata/mod.rs @@ -7,38 +7,76 @@ pub mod npda; pub mod ntm; pub mod tm; +pub trait Get{ + type Output; + fn get(&self, idx: Idx) -> Option<&Self::Output>; + fn get_mut(&mut self, idx: Idx) -> Option<&mut Self::Output>; +} + +pub trait GetDefault{ + type Output: Default; + fn get_or_insert_default(&mut self, idx: Idx) -> &Self::Output; + fn get_mut_or_insert_default(&mut self, idx: Idx) -> &mut Self::Output; +} + +macro_rules! index { + ($ty: ident, $self:ident, $collection: expr, $index_calc: expr, $index: pat = $index_ty: ty $(, $default: expr)?) => { + impl Get<$index_ty> for $ty { + type Output = T; + fn get(&$self, $index: $index_ty) -> Option<&T>{ + $collection.get($index_calc) + } + + fn get_mut(&mut $self, $index: $index_ty) -> Option<&mut T>{ + $collection.get_mut($index_calc) + } + } + + impl std::ops::Index<$index_ty> for $ty{ + type Output = T; + + fn index(& $self, $index: $index_ty) -> &T{ + $collection.get($index_calc).unwrap() + } + } + + impl std::ops::IndexMut<$index_ty> for $ty{ + fn index_mut(&mut $self, $index: $index_ty) -> &mut T{ + $collection.get_mut($index_calc).unwrap() + } + } + + $( + impl GetDefault<$index_ty> for $ty { + type Output = T; + fn get_or_insert_default(&mut $self, $index: $index_ty) -> &T{ + $default + } + + fn get_mut_or_insert_default(&mut $self, $index: $index_ty) -> &mut T{ + $default + } + } + )? + }; +} + #[derive(Clone, Debug, Copy, Hash, PartialEq, Eq)] pub struct State(u16); #[derive(Clone, Debug, Copy, Hash, PartialEq, Eq)] pub struct Symbol(u16); + #[derive(Clone, Debug)] pub struct StateMap(Vec); -trait Get { - type Output; - fn get(&self, index: Idx) -> Option<&Self::Output>; -} - -impl Get for StateMap { - type Output = T; - - fn get(&self, index: State) -> Option<&Self::Output> { - self.0.get(index.0 as usize) - } -} +index!(StateMap, self, self.0, index.0 as usize, index = State); #[derive(Clone, Debug)] pub struct SymbolMap(Vec); -impl Get for SymbolMap { - type Output = T; - - fn get(&self, index: Symbol) -> Option<&Self::Output> { - self.0.get(index.0 as usize) - } -} +index!(SymbolMap, self, self.0, index.0 as usize, index = Symbol); #[derive(Clone, Debug, Default)] pub struct StateSymbolMap { @@ -46,40 +84,18 @@ pub struct StateSymbolMap { max_state: u16, } -impl Get<(State, Symbol)> for StateSymbolMap { - type Output = T; - fn get(&self, (state, symbol): (State, Symbol)) -> Option<&Self::Output> { - self.map - .get(state.0 as usize + self.max_state as usize * symbol.0 as usize) - } -} +index!(StateSymbolMap, self, self.map, state.0 as usize + self.max_state as usize * symbol.0 as usize, (state, symbol) = (State, Symbol)); +index!(StateSymbolMap, self, self.map, state.0 as usize + self.max_state as usize * symbol.0 as usize, (symbol, state) = (Symbol, State)); + #[derive(Clone, Debug, Default)] pub struct CharMap(HashMap); -impl Get for CharMap { - type Output = T; - fn get(&self, index: char) -> Option<&Self::Output> { - self.0.get(&index) - } -} +index!(CharMap, self, self.0, &char, char = char, self.0.entry(char).or_default()); #[derive(Clone, Debug, Default)] pub struct CharEpsilonMap(HashMap, T>); -impl Get for CharEpsilonMap { - type Output = T; - - fn get(&self, index: char) -> Option<&Self::Output> { - self.0.get(&Some(index)) - } -} - -impl Get> for CharEpsilonMap { - type Output = T; - - fn get(&self, index: Option) -> Option<&Self::Output> { - self.0.get(&index) - } -} +index!(CharEpsilonMap, self, self.0, &Some(char), char = char, self.0.entry(Some(char)).or_default()); +index!(CharEpsilonMap, self, self.0, &char, char = Option, self.0.entry(char).or_default()); \ No newline at end of file diff --git a/src/automata/npda.rs b/src/automata/npda.rs index 712fcf2..4d14906 100644 --- a/src/automata/npda.rs +++ b/src/automata/npda.rs @@ -7,14 +7,13 @@ struct To(State, Vec); #[derive(Clone, Debug)] pub struct TransitionTable { - pub(in super::npda) initial_state: State, + initial_state: State, initial_stack: Symbol, - state_names: Vec, - symbol_names: Vec, + state_names: StateMap, + symbol_names: SymbolMap, alphabet: HashSet, - accept_empty: bool, - final_states: Vec, + final_states: Option>, transitions: StateSymbolMap>>, } @@ -31,8 +30,15 @@ pub struct Simulator { running: Vec, } +pub enum SimulatorResult{ + Pending, + Reject, + Accept(NPDA) +} + impl Simulator { pub fn begin(input: impl Into, table: TransitionTable) -> Self { + Self { input: input.into(), running: vec![NPDA { @@ -44,7 +50,8 @@ impl Simulator { } } - pub fn step(&mut self) -> Option { + pub fn step(&mut self) -> SimulatorResult { + println!("step, ({}) paths", self.running.len()); let mut new = Vec::new(); for mut npda in self.running.drain(..) { let Some(top) = npda.stack.pop() else { @@ -73,10 +80,12 @@ impl Simulator { .get(npda.position..) .and_then(|c| c.chars().next()) else { - if self.table.final_states[npda.state.0 as usize] - || self.table.accept_empty && npda.stack == [self.table.initial_stack] + if let Some(final_states) = &self.table.final_states + && final_states.get(npda.state).copied().unwrap_or_default() { - return Some(npda.clone()); + return SimulatorResult::Accept(npda.clone()); + } else if npda.stack == [self.table.initial_stack] { + return SimulatorResult::Accept(npda.clone()); } else { continue; } @@ -100,7 +109,11 @@ impl Simulator { } } self.running = new; - None + if self.running.is_empty(){ + SimulatorResult::Reject + }else{ + SimulatorResult::Pending + } } } @@ -108,7 +121,7 @@ impl Simulator { use crate::loader::{ DELTA_LOWER, GAMMA_UPPER, SIGMA_UPPER, Spanned, - ast::{self, Symbol as Sym, Tuple}, + ast::{self, Symbol as Sym}, lexer::Lexer, log::Logs, parser::Parser, @@ -130,11 +143,12 @@ impl TransitionTable { let mut initial_state = None; let mut initial_stack = None; - let mut states = HashSet::new(); - let mut stack_symbols = HashSet::new(); + let mut states = HashMap::new(); + let mut stack_symbols = HashMap::new(); let mut alphabet = HashSet::new(); let mut final_states = None; - let mut accept_empty = false; + + let mut transitions_map = HashMap::new(); for Spanned(element, span) in ast { use Spanned as S; @@ -152,12 +166,19 @@ impl TransitionTable { let Some(ident) = item.expect_ident(&mut logs) else { continue; }; - if !states.insert(ident) { + let state = match states.len().try_into() { + Ok(ok) => State(ok), + Err(_) => { + logs.emit_error("too many states defined", item.1); + State(0) + } + }; + if states.insert(ident, state).is_some() { logs.emit_error("state redefined", item.1); } - } + } - if list.is_empty(){ + if list.is_empty() { logs.emit_error("states cannot be empty", *span); } } @@ -177,15 +198,18 @@ impl TransitionTable { logs.emit_error("letter cannot be longer than one char", item.1); } - if !alphabet.insert(ident) { + if !alphabet.insert(ident.chars().next().unwrap_or_default()) { logs.emit_error("letter redefined", item.1); } } - if list.is_empty(){ + if list.is_empty() { logs.emit_error("alphabet cannot be empty", *span); } } TL::Assignment(S(Dest::Ident("F"), _), list) => { + if final_states.is_some() { + logs.emit_error("final states already set", *span); + } let mut map = HashSet::new(); let Some(list) = list.expect_set(&mut logs) else { continue; @@ -194,16 +218,13 @@ impl TransitionTable { let Some(ident) = item.expect_ident(&mut logs) else { continue; }; - if !states.contains(ident) { + if let Some(state) = states.get(ident){ + if !map.insert(*state) { + logs.emit_error("final state redefined", item.1); + } + } else{ logs.emit_error("final state not defined in set of states", item.1); } - if !map.insert(ident) { - logs.emit_error("final states redefined", item.1); - } - } - - if final_states.is_some() { - logs.emit_error("final states already set", *span); } final_states = Some(map); } @@ -218,40 +239,48 @@ impl TransitionTable { let Some(ident) = item.expect_ident(&mut logs) else { continue; }; - if !stack_symbols.insert(ident) { + let symbol = match stack_symbols.len().try_into() { + Ok(ok) => Symbol(ok), + Err(_) => { + logs.emit_error("too many stack symbols defined", item.1); + Symbol(0) + } + }; + if stack_symbols.insert(ident, symbol).is_some() { logs.emit_error("stack symbol redefined", item.1); } } - if list.is_empty(){ + if list.is_empty() { logs.emit_error("stack symbols cannot be empty", *span); } } TL::Assignment(S(Dest::Ident("I" | "q0"), _), S(src, src_d)) => match src { ast::Item::Symbol(Sym::Ident(ident)) => { - if !states.contains(ident) { - logs.emit_error("initial state symbol not defined as a state", *src_d); - } if initial_state.is_some() { logs.emit_error("initial state already set", *span); } - initial_state = Some(ident) + if let Some(initial) = states.get(ident) { + initial_state = Some(*initial) + } else { + logs.emit_error("initial state symbol not defined as a state", *src_d); + } } _ => logs.emit_error("expected ident", *src_d), }, TL::Assignment(S(Dest::Ident("S" | "z0"), _), S(src, src_d)) => match src { ast::Item::Symbol(Sym::Ident(ident)) => { - if !stack_symbols.contains(ident) - { + if initial_stack.is_some() { + logs.emit_error("initial stack already set", *span); + } + if let Some(initial) = stack_symbols.get(ident) { + initial_stack = Some(*initial) + } else { logs.emit_error( "initial stack symbol not defined as a stack symbol", *src_d, ); } - if initial_stack.is_some() { - logs.emit_error("initial stack already set", *span); - } - initial_stack = Some(ident) } _ => logs.emit_error("expected ident", *src_d), }, @@ -264,18 +293,39 @@ impl TransitionTable { list, ) => { let list = list.set_weak(); - let Some((state, letter, sym)) = + let Some((state, letter, stack_symbol)) = tuple.as_ref().expect_npda_transition_function(&mut logs) else { continue; }; - if !states.contains(state.0){ + let Some(state) = states.get(state.0).copied() else{ logs.emit_error("transition state not defined as state", state.1); - } - if !stack_symbols.contains(sym.0){ - logs.emit_error("transition stack symbol not defined as stack symbol", sym.1); - } - + continue; + }; + let Some(stack_symbol) = stack_symbols.get(stack_symbol.0).copied() else { + logs.emit_error( + "transition stack symbol not defined as stack symbol", + stack_symbol.1, + ); + continue; + }; + + let char = match letter.0 { + Sym::Epsilon => None, + Sym::Ident(val) => if let Some(char) = val.chars().next() && val.chars().count() == 1 { + if !alphabet.contains(&char){ + logs.emit_error("transition letter not defined in alphabet", letter.1); + } + Some(char) + }else{ + logs.emit_error( + "transition letter can only be single character", + letter.1, + ); + None + }, + }; + for item in list { let Some((next_state, stack)) = item .expect_tuple(&mut logs) @@ -284,9 +334,28 @@ impl TransitionTable { continue; }; - if !states.contains(next_state.0){ + let Some(next_state) = states.get(next_state.0).copied() else { logs.emit_error("transition state not defined as state", next_state.1); - } + continue; + }; + + let stack: Vec<_> = stack.iter().rev().filter_map(|symbol|{ + if matches!(symbol.0, ast::Item::Symbol(Sym::Epsilon)) { + return None; + } + let ident = symbol.expect_ident(&mut logs)?; + + let Some(symbol) = stack_symbols.get(ident).copied() else{ + logs.emit_error("transition stack symbol not defined", symbol.1); + return None; + }; + Some(symbol) + }).collect(); + + transitions_map + .entry((state, char, stack_symbol)) + .or_insert(Vec::new()) + .push((next_state, stack)) } } TL::Assignment(S(Dest::Function(S(name, _), _), dest_s), _) => { @@ -303,15 +372,87 @@ impl TransitionTable { } } + if stack_symbols.is_empty() { + logs.emit_error_locless("stack symbols never defined"); + } + + if alphabet.is_empty() { + logs.emit_error_locless("alphabet never defined"); + } + + if states.is_empty() { + logs.emit_error_locless("states never defined"); + } + + let initial_stack = match initial_stack { + Some(some) => some, + None => { + if let Some(initial) = stack_symbols.get("z0") { + logs.emit_warning_locless( + "initial stack symbol not defined, defaulting to 'z0'", + ); + *initial + } else { + logs.emit_error_locless("initial stack symbol not defined"); + Symbol(0) + } + } + }; + + let initial_state = match initial_state { + Some(some) => some, + None => { + if let Some(initial) = states.get("z0") { + logs.emit_warning_locless("initial state not defined, defaulting to 'q0'"); + *initial + } else { + logs.emit_error_locless("initial state not defined"); + State(0) + } + } + }; + + let state_names = StateMap(states.iter().fold( + vec![String::new(); states.len()], + |mut a, (k, v)| { + a[v.0 as usize] = k.to_string(); + a + }, + )); + let symbol_names = SymbolMap(stack_symbols.iter().fold( + vec![String::new(); stack_symbols.len()], + |mut a, (k, v)| { + a[v.0 as usize] = k.to_string(); + a + }, + )); + let final_states = final_states.map(|f|{ + StateMap(f.iter().fold(vec![false; states.len()], |mut a, k|{ + a[k.0 as usize] = true; + a + })) + }); + + let mut transitions: StateSymbolMap>> = StateSymbolMap{ + map: vec![CharEpsilonMap::default(); stack_symbols.len() * states.len()], + max_state: states.len() as u16, + }; + + for ((q, c, s), to) in transitions_map{ + let from = &mut transitions[(q, s)]; + for (n, ss) in to{ + from.get_mut_or_insert_default(c).push(To(n, ss)); + } + } + let table = TransitionTable { - initial_state: crate::automata::State(0), - initial_stack: crate::automata::Symbol(0), - state_names: Vec::new(), - symbol_names: Vec::new(), - alphabet: HashSet::new(), - accept_empty: false, - final_states: Vec::new(), - transitions: Default::default(), + initial_state, + initial_stack, + state_names, + symbol_names, + alphabet, + final_states, + transitions, }; Ok((table, logs)) diff --git a/src/loader/log.rs b/src/loader/log.rs index 270dfb2..8498828 100644 --- a/src/loader/log.rs +++ b/src/loader/log.rs @@ -26,10 +26,18 @@ impl<'a> Logs<'a> { self.logs.push(entry); } + pub fn emit_error_locless(&mut self, msg: impl Into) { + self.emit(LogEntry { + message: msg.into(), + span: None, + level: LogLevel::Error, + }); + } + pub fn emit_error(&mut self, msg: impl Into, span: Span) { self.emit(LogEntry { message: msg.into(), - span, + span: Some(span), level: LogLevel::Error, }); } @@ -37,7 +45,15 @@ impl<'a> Logs<'a> { pub fn emit_warning(&mut self, msg: impl Into, span: Span) { self.emit(LogEntry { message: msg.into(), - span, + span: Some(span), + level: LogLevel::Warning, + }); + } + + pub fn emit_warning_locless(&mut self, msg: impl Into) { + self.emit(LogEntry { + message: msg.into(), + span: None, level: LogLevel::Warning, }); } @@ -45,7 +61,7 @@ impl<'a> Logs<'a> { pub fn emit_info(&mut self, msg: impl Into, span: Span) { self.emit(LogEntry { message: msg.into(), - span, + span: Some(span), level: LogLevel::Info, }); } @@ -66,7 +82,7 @@ pub enum LogLevel { pub struct LogEntry { pub message: String, - pub span: Span, + pub span: Option, pub level: LogLevel, } @@ -93,61 +109,53 @@ impl<'a> Display for LogEntryDisplay<'a> { } writeln!(f, "{}{RESET}", self.entry.message)?; - let line_start = self - .src - .get(..=self.entry.span.0) - .unwrap_or("") - .lines() - .count(); - let line_end = self - .src - .get(..self.entry.span.1) - .unwrap_or("") - .lines() - .count(); + if let Some(span) = self.entry.span { + let line_start = self.src.get(..=span.0).unwrap_or("").lines().count(); + let line_end = self.src.get(..span.1).unwrap_or("").lines().count(); - let padding = line_end.ilog10() as usize; + let padding = line_end.ilog10() as usize; - let start = self - .src - .get(..self.entry.span.0) - .and_then(|s| s.rfind('\n')) - .map(|v| v + 1) - .unwrap_or(0); + let start = self + .src + .get(..span.0) + .and_then(|s| s.rfind('\n')) + .map(|v| v + 1) + .unwrap_or(0); - let end = self - .src - .get(self.entry.span.1..) - .and_then(|s| s.find('\n')) - .map(|v| v + self.entry.span.1) - .unwrap_or(self.src.len()); + let end = self + .src + .get(span.1..) + .and_then(|s| s.find('\n')) + .map(|v| v + span.1) + .unwrap_or(self.src.len()); - let mut index = start; - for (i, line) in self.src.get(start..end).unwrap_or("").lines().enumerate() { - write!(f, "{BOLD}{CYAN}{:>padding$}: {RESET}", i + line_start)?; - for char in line.chars() { - if char == '\t' { - write!(f, " ")? - } else { - write!(f, "{char}")? + let mut index = start; + for (i, line) in self.src.get(start..end).unwrap_or("").lines().enumerate() { + write!(f, "{BOLD}{CYAN}{:>padding$}: {RESET}", i + line_start)?; + for char in line.chars() { + if char == '\t' { + write!(f, " ")? + } else { + write!(f, "{char}")? + } } - } - writeln!(f)?; - write!(f, "{BOLD}{CYAN}")?; - for _ in 0..padding + 3 { - write!(f, " ")?; - } - for char in line.chars() { - if (self.entry.span.0..self.entry.span.1).contains(&index) { - write!(f, "~")?; - } else { + writeln!(f)?; + write!(f, "{BOLD}{CYAN}")?; + for _ in 0..padding + 3 { write!(f, " ")?; } - index += char.len_utf8(); + for char in line.chars() { + if (span.0..span.1).contains(&index) { + write!(f, "~")?; + } else { + write!(f, " ")?; + } + index += char.len_utf8(); + } + write!(f, "{RESET}")?; + index += '\n'.len_utf8(); + writeln!(f)?; } - write!(f, "{RESET}")?; - index += '\n'.len_utf8(); - writeln!(f)?; } Ok(()) diff --git a/src/loader/parser.rs b/src/loader/parser.rs index 5818650..c0dccd8 100644 --- a/src/loader/parser.rs +++ b/src/loader/parser.rs @@ -6,7 +6,8 @@ use super::lexer::{Lexer, Token}; use std::iter::Peekable; pub struct Parser<'a> { - lexer: Peekable>, + lexer: Lexer<'a>, + peek: Option>>, logs: Logs<'a>, src: &'a str, eof: Span, @@ -18,11 +19,15 @@ impl<'a> Parser<'a> { eof: lexer.eof_span(), src: lexer.input(), logs: Logs::new(lexer.input()), - lexer: lexer.peekable(), + peek: None, + lexer, } } fn next_token(&mut self) -> Option>> { + if self.peek.is_some(){ + return self.peek.take() + } loop { match self.lexer.next()? { Spanned(Ok(Token::Comment(_)), _) => {} @@ -33,12 +38,10 @@ impl<'a> Parser<'a> { } fn peek_token(&mut self) -> Option>> { - loop { - match *self.lexer.peek()? { - Spanned(Ok(ok), r) => return Some(Spanned(ok, r)), - Spanned(Err(err), span) => self.logs.emit_error(format!("lexer: {err:?}"), span), - } + if self.peek.is_none(){ + self.peek = self.next_token(); } + self.peek } fn expect_token(&mut self, expected: Token<'a>) -> (bool, Span) { @@ -125,6 +128,7 @@ impl<'a> Parser<'a> { Some(Spanned(Token::LPar, _)) => self.parse_tupple().map(Item::Tuple), Some(Spanned(Token::LBrace | Token::LBracket, _)) => self.parse_list().map(Item::List), Some(Spanned(got, span)) => { + self.next_token(); self.logs.emit_error( format!( "unexpected token {:#}, expected {:}|{:}|{:}|{:}|{:}", diff --git a/src/main.rs b/src/main.rs index 0cfeefe..2b4b0e8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,4 +17,21 @@ fn main() { return; } }; + + let input = "aababaab"; + println!("running on: '{input}'"); + let mut simulator = npda::Simulator::begin(input, table); + loop { + match simulator.step(){ + npda::SimulatorResult::Pending => {}, + npda::SimulatorResult::Reject => { + println!("REJECTED"); + break; + }, + npda::SimulatorResult::Accept(npda) => { + println!("ACCEPT: {npda:?}"); + break; + }, + } + } }