From c06a0a014736d405fae89bbc950db380ebef9245 Mon Sep 17 00:00:00 2001 From: Parker TenBroeck <51721964+ParkerTenBroeck@users.noreply.github.com> Date: Sun, 11 Jan 2026 16:35:17 -0500 Subject: [PATCH 1/3] serde nonsense --- automata/src/automatan/pda.rs | 73 +++++++++++++++++-------------- automata/src/automatan/tm.rs | 74 +++++++++++++++++-------------- automata/src/lib.rs | 82 ++++++++++++++++++++++++++++++++++- automata/src/loader/mod.rs | 28 ++++++------ 4 files changed, 176 insertions(+), 81 deletions(-) diff --git a/automata/src/automatan/pda.rs b/automata/src/automatan/pda.rs index 5532a14..80f82d7 100644 --- a/automata/src/automatan/pda.rs +++ b/automata/src/automatan/pda.rs @@ -2,51 +2,60 @@ use std::collections::HashSet; use super::*; -use crate::{delta_lower, gamma_upper, loader::{ +use crate::{delta_lower, dual_struct_serde, gamma_upper, loader::{ Context, INITIAL_STACK, INITIAL_STATE, Spanned, ast::{self, Symbol as Sym}, log::LogSink }, sigma_upper}; -#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] -#[cfg_attr(feature = "serde", derive(serde::Serialize))] -pub struct TransitionFrom<'a> { - pub state: State<'a>, - pub letter: Option>, - pub symbol: Symbol<'a>, +dual_struct_serde! { + #[derive(Debug, PartialEq, Eq, Clone, Hash)] + pub struct TransitionFrom<'a> { + #[serde(borrow)] + pub state: State<'a>, + #[serde(borrow)] + pub letter: Option>, + #[serde(borrow)] + pub symbol: Symbol<'a>, + } } -#[derive(Debug, PartialEq, Eq, Clone, Hash)] -#[cfg_attr(feature = "serde", derive(serde::Serialize))] -pub struct TransitionTo<'a> { - pub state: State<'a>, - pub stack: Vec>, +dual_struct_serde! { + #[derive(Debug, PartialEq, Eq, Clone, Hash)] + pub struct TransitionTo<'a> { + #[serde(borrow)] + pub state: State<'a>, + #[serde(borrow)] + pub stack: Vec>, - pub transition: Span, - pub function: Span, + pub transition: Span, + pub function: Span, + } } -#[derive(Clone, Debug)] -#[allow(unused)] -#[cfg_attr(feature = "serde", serde_with::serde_as)] -#[cfg_attr(feature = "serde", derive(serde::Serialize))] -pub struct Pda<'a> { - pub initial_state: State<'a>, - pub initial_stack: Symbol<'a>, - pub states: HashMap, StateInfo>, - pub symbols: HashMap, SymbolInfo>, - pub alphabet: HashMap, LetterInfo>, +dual_struct_serde! { {#[serde_with::serde_as]} + #[derive(Clone, Debug)] + pub struct Pda<'a> { + #[serde(borrow)] + pub initial_state: State<'a>, + #[serde(borrow)] + pub initial_stack: Symbol<'a>, + #[serde(borrow)] + pub states: HashMap, StateInfo>, + #[serde(borrow)] + pub symbols: HashMap, SymbolInfo>, + #[serde(borrow)] + pub alphabet: HashMap, LetterInfo>, - pub final_states: Option, StateInfo>>, + #[serde(borrow)] + pub final_states: Option, StateInfo>>, - #[cfg(feature = "serde")] - #[serde_as(as = "serde_with::Seq<(_, _)>")] - pub transitions: HashMap, HashSet>>, - - #[cfg(not(feature = "serde"))] - pub transitions: HashMap, HashSet>>, + #[serde(borrow)] + #[serde_as(as = "serde_with::Seq<(_, _)>")] + pub transitions: HashMap, HashSet>>, + } } impl<'a> Pda<'a> { - pub fn parse( + pub fn compile( items: impl Iterator>>, ctx: &mut Context<'a>, options: Options, diff --git a/automata/src/automatan/tm.rs b/automata/src/automatan/tm.rs index 179ab45..7c52244 100644 --- a/automata/src/automatan/tm.rs +++ b/automata/src/automatan/tm.rs @@ -2,57 +2,65 @@ use std::collections::HashSet; use super::*; -use crate::{delta_lower, gamma_upper, loader::{ +use crate::{delta_lower, dual_struct_serde, gamma_upper, loader::{ BLANK_SYMBOL, Context, Spanned, ast::{self, Symbol as Sym}, log::LogSink }}; - -#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] -#[cfg_attr(feature = "serde", derive(serde::Serialize))] -pub struct TransitionFrom<'a> { - pub state: State<'a>, - pub symbol: Symbol<'a>, +dual_struct_serde! { + #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] + pub struct TransitionFrom<'a> { + #[serde(borrow)] + pub state: State<'a>, + #[serde(borrow)] + pub symbol: Symbol<'a>, + } } -#[derive(Debug, PartialEq, Eq, Clone, Hash)] -#[cfg_attr(feature = "serde", derive(serde::Serialize))] +#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum Direction { Left, Right, None, } -#[derive(Debug, PartialEq, Eq, Clone, Hash)] -#[cfg_attr(feature = "serde", derive(serde::Serialize))] -pub struct TransitionTo<'a> { - pub state: State<'a>, - pub symbol: Symbol<'a>, - pub direction: Direction, +dual_struct_serde! { + #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] + pub struct TransitionTo<'a> { + #[serde(borrow)] + pub state: State<'a>, + #[serde(borrow)] + pub symbol: Symbol<'a>, + pub direction: Direction, - pub transition: Span, - pub function: Span, + pub transition: Span, + pub function: Span, + } } -#[derive(Clone, Debug)] -#[allow(unused)] -#[cfg_attr(feature = "serde", serde_with::serde_as)] -#[cfg_attr(feature = "serde", derive(serde::Serialize))] -pub struct Tm<'a> { - pub initial_state: State<'a>, - pub initial_tape: Symbol<'a>, - pub states: HashMap, StateInfo>, - pub symbols: HashMap, SymbolInfo>, +dual_struct_serde! {{#[serde_with::serde_as]} + #[derive(Clone, Debug)] + pub struct Tm<'a> { + #[serde(borrow)] + pub initial_state: State<'a>, + #[serde(borrow)] + pub initial_tape: Symbol<'a>, + #[serde(borrow)] + pub states: HashMap, StateInfo>, + #[serde(borrow)] + pub symbols: HashMap, SymbolInfo>, - pub final_states: HashMap, StateInfo>, + #[serde(borrow)] + pub final_states: HashMap, StateInfo>, - #[cfg(feature = "serde")] - #[serde_as(as = "serde_with::Seq<(_, _)>")] - pub transitions: HashMap, HashSet>>, - #[cfg(not(feature = "serde"))] - pub transitions: HashMap, HashSet>>, + + #[serde(borrow)] + #[serde_as(as = "serde_with::Seq<(_, _)>")] + pub transitions: HashMap, HashSet>>, + } } impl<'a> Tm<'a> { - pub fn parse( + pub fn compile( items: impl Iterator>>, ctx: &mut Context<'a>, options: Options, diff --git a/automata/src/lib.rs b/automata/src/lib.rs index ecad2bb..f1b2f24 100644 --- a/automata/src/lib.rs +++ b/automata/src/lib.rs @@ -1,7 +1,6 @@ pub mod automatan; pub mod loader; - #[macro_export] macro_rules! dual_struct_serde { ($({$(#[$serde_specific:meta])*})? @@ -33,4 +32,83 @@ macro_rules! dual_struct_serde { ),* } }; -} \ No newline at end of file +} + +#[macro_export] +macro_rules! dual_enum_serde { + ( + $( {$(#[$serde_specific:meta])*} )? + $(#[$enum_meta:meta])* + $vis:vis enum $Name:ident $(<$($gen:tt),*>)? + { + $( + $(#[$variant_meta:meta])* + $Variant:ident + $( + // Tuple variant: Variant(T1, T2, ...) + ( $( + $(#[$tfield_meta:meta])* + $tfield_ty:ty + ),* $(,)? ) + )? + $( + // Struct variant: Variant { a: T, b: U, ... } + { $( + $(#[$sfield_meta:meta])* + $sfield_vis:vis $sfield_name:ident : $sfield_ty:ty + ),* $(,)? } + )? + ),* $(,)? + } + ) => { + #[cfg(feature = "serde")] + $(#[$enum_meta])* + #[derive(serde::Serialize, serde::Deserialize)] + $( $(#[$serde_specific])* )? + $vis enum $Name $(<$($gen),*>)? { + $( + $(#[$variant_meta])* + $Variant + $( + ( + $( + $(#[$tfield_meta])* + $tfield_ty + ),* + ) + )? + $( + { + $( + $(#[$sfield_meta])* + $sfield_vis $sfield_name: $sfield_ty + ),* + } + )? + ),* + } + + #[cfg(not(feature = "serde"))] + $(#[$enum_meta])* + $vis enum $Name $(<$($gen),*>)? { + $( + // strip variant + field attrs in non-serde version + $Variant + $( + ( + $( + $tfield_ty + ),* + ) + )? + $( + { + $( + $sfield_vis $sfield_name: $sfield_ty + ),* + } + )? + ),* + } + }; +} diff --git a/automata/src/loader/mod.rs b/automata/src/loader/mod.rs index 5379a3d..ef4546b 100644 --- a/automata/src/loader/mod.rs +++ b/automata/src/loader/mod.rs @@ -1,9 +1,8 @@ use crate::{ - automatan::*, - loader::{ + automatan::*, dual_enum_serde, dual_struct_serde, loader::{ ast::TopLevel, log::{LogEntry, LogSink}, - }, + } }; pub mod ast; @@ -120,13 +119,14 @@ impl<'a> Context<'a> { } } -#[cfg_attr(feature = "serde", derive(serde::Serialize))] -#[cfg_attr(feature = "serde", serde(tag = "type"))] -#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))] -pub enum Machine<'a> { - Fa(fa::Fa<'a>), - Pda(pda::Pda<'a>), - Tm(tm::Tm<'a>), +dual_enum_serde!{ + {#[serde(tag = "type")] #[serde(rename_all = "snake_case")]} + #[derive(Clone, Debug)] + pub enum Machine<'a> { + Fa(#[serde(borrow)] fa::Fa<'a>), + Pda(#[serde(borrow)] pda::Pda<'a>), + Tm(#[serde(borrow)] tm::Tm<'a>), + } } pub fn parse_universal<'a>(ctx: &mut Context<'a>) -> Option> { @@ -194,9 +194,9 @@ pub fn parse_universal<'a>(ctx: &mut Context<'a>) -> Option> { Some(match parse_type(items.next(), ctx)? { Type::Dfa => Machine::Fa(fa::Fa::compile(items, ctx, D)?), Type::Nfa => Machine::Fa(fa::Fa::compile(items, ctx, N)?), - Type::Dpda => Machine::Pda(pda::Pda::parse(items, ctx, D)?), - Type::Npda => Machine::Pda(pda::Pda::parse(items, ctx, N)?), - Type::Tm => Machine::Tm(tm::Tm::parse(items, ctx, D)?), - Type::Ntm => Machine::Tm(tm::Tm::parse(items, ctx, N)?), + Type::Dpda => Machine::Pda(pda::Pda::compile(items, ctx, D)?), + Type::Npda => Machine::Pda(pda::Pda::compile(items, ctx, N)?), + Type::Tm => Machine::Tm(tm::Tm::compile(items, ctx, D)?), + Type::Ntm => Machine::Tm(tm::Tm::compile(items, ctx, N)?), }) } From d6e4fff7821be94e7a6dc1bd9a92263443984ad0 Mon Sep 17 00:00:00 2001 From: ParkerTenBroeck <51721964+ParkerTenBroeck@users.noreply.github.com> Date: Sun, 11 Jan 2026 21:35:39 -0500 Subject: [PATCH 2/3] improved error messages for TM and PDA's --- automata/src/automatan/fa.rs | 27 +- automata/src/automatan/mod.rs | 19 +- automata/src/automatan/pda.rs | 705 ++++++++++++++++++++-------------- automata/src/automatan/tm.rs | 547 +++++++++++++++----------- automata/src/loader/ast.rs | 28 +- automata/src/loader/lexer.rs | 5 +- automata/src/loader/log.rs | 1 - automata/src/loader/mod.rs | 10 +- automata/src/loader/parser.rs | 2 +- cli/src/main.rs | 6 +- web/root/src/automata.ts | 2 +- web_lib/src/lib.rs | 2 +- 12 files changed, 817 insertions(+), 537 deletions(-) diff --git a/automata/src/automatan/fa.rs b/automata/src/automatan/fa.rs index 66f4989..ab8cecc 100644 --- a/automata/src/automatan/fa.rs +++ b/automata/src/automatan/fa.rs @@ -3,11 +3,13 @@ use std::collections::HashSet; use super::*; use crate::{ - delta_lower, dual_struct_serde, epsilon, loader::{ + delta_lower, dual_struct_serde, epsilon, + loader::{ Context, INITIAL_STATE, Spanned, ast::{self, Symbol as Sym, TopLevel}, log::LogSink, - }, sigma_upper + }, + sigma_upper, }; dual_struct_serde! { @@ -104,6 +106,12 @@ impl<'a, 'b> FaCompiler<'a, 'b> { self.compile_top_level(element, span); } + if self.states_def.is_none() { + self.ctx + .emit_error_locless("states never defined") + .emit_help_logless("add: Q = {...}"); + } + if self.alphabet_def.is_none() { self.ctx .emit_error_locless("alphabet never defined") @@ -111,12 +119,6 @@ impl<'a, 'b> FaCompiler<'a, 'b> { .emit_info_logless(concat!("E can be ", sigma_upper!(str))); } - if self.states_def.is_none() { - self.ctx - .emit_error_locless("states never defined") - .emit_help_logless("add: Q = {...}"); - } - if self.final_states_def.is_none() { self.ctx .emit_error_locless("final states never defined") @@ -139,9 +141,12 @@ impl<'a, 'b> FaCompiler<'a, 'b> { } }; - if self.transitions.is_empty(){ - self.ctx.emit_warning_locless("no transitions defined") - .emit_help_logless("consider defining one: d(state, letter|epsilon) = state | {state, state, ...}") + if self.transitions.is_empty() { + self.ctx + .emit_warning_locless("no transitions defined") + .emit_help_logless( + "consider defining one: d(state, letter|epsilon) = state | {state, ...}", + ) .emit_info_logless(concat!("d can be ", delta_lower!(str))) .emit_info_logless(concat!("epsilon can be ", epsilon!(str))); } diff --git a/automata/src/automatan/mod.rs b/automata/src/automatan/mod.rs index bf457c6..58750a4 100644 --- a/automata/src/automatan/mod.rs +++ b/automata/src/automatan/mod.rs @@ -6,7 +6,6 @@ pub mod fa; pub mod pda; pub mod tm; - #[derive(Clone, Copy, Debug)] pub struct Options { pub non_deterministic: bool, @@ -14,15 +13,27 @@ pub struct Options { } #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize), serde(transparent))] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(transparent) +)] pub struct State<'a>(pub &'a str); #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize), serde(transparent))] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(transparent) +)] pub struct Symbol<'a>(pub &'a str); #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize), serde(transparent))] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(transparent) +)] pub struct Letter<'a>(pub &'a str); #[derive(Clone, Debug)] diff --git a/automata/src/automatan/pda.rs b/automata/src/automatan/pda.rs index 80f82d7..5959ed9 100644 --- a/automata/src/automatan/pda.rs +++ b/automata/src/automatan/pda.rs @@ -2,9 +2,15 @@ use std::collections::HashSet; use super::*; -use crate::{delta_lower, dual_struct_serde, gamma_upper, loader::{ - Context, INITIAL_STACK, INITIAL_STATE, Spanned, ast::{self, Symbol as Sym}, log::LogSink -}, sigma_upper}; +use crate::{ + delta_lower, dual_struct_serde, epsilon, gamma_upper, + loader::{ + Context, INITIAL_STACK, INITIAL_STATE, Spanned, + ast::{self, Symbol as Sym}, + log::LogSink, + }, + sigma_upper, +}; dual_struct_serde! { #[derive(Debug, PartialEq, Eq, Clone, Hash)] @@ -54,315 +60,446 @@ dual_struct_serde! { {#[serde_with::serde_as]} } } +pub struct PdaCompiler<'a, 'b> { + ctx: &'b mut Context<'a>, + options: Options, + + initial_state: Option<(State<'a>, Span)>, + initial_stack: Option<(Symbol<'a>, Span)>, + + states: HashMap, StateInfo>, + states_def: Option, + + symbols: HashMap, SymbolInfo>, + symbols_def: Option, + + alphabet: HashMap, LetterInfo>, + alphabet_def: Option, + + final_states: HashMap, StateInfo>, + final_states_def: Option, + + transitions: HashMap, HashSet>>, +} + impl<'a> Pda<'a> { pub fn compile( items: impl Iterator>>, ctx: &mut Context<'a>, options: Options, ) -> Option> { - let mut initial_state = None; - let mut initial_stack = None; + PdaCompiler::new(ctx, options).compile(items) + } +} - let mut states = HashMap::new(); - let mut symbols = HashMap::new(); - let mut alphabet = HashMap::new(); - let mut final_states = None; +impl<'a, 'b> PdaCompiler<'a, 'b> { + pub fn new(ctx: &'b mut Context<'a>, options: Options) -> Self { + Self { + ctx, + options, - let mut transitions: HashMap, HashSet>> = - HashMap::new(); + initial_state: Default::default(), + initial_stack: Default::default(), + states: Default::default(), + states_def: Default::default(), + symbols: Default::default(), + symbols_def: Default::default(), + alphabet: Default::default(), + alphabet_def: Default::default(), + final_states: Default::default(), + final_states_def: Default::default(), + transitions: Default::default(), + } + } + pub fn compile( + mut self, + items: impl Iterator>>, + ) -> Option> { for Spanned(element, span) in items { - use Spanned as S; - use ast::TopLevel as TL; - match element { - TL::Item(S("Q", _), list) => { - if !states.is_empty() { - ctx.emit_error("states already set", span); - } - let Some(list) = list.expect_set(ctx) else { - continue; - }; - for item in list { - let Some(ident) = item.expect_ident(ctx) else { - continue; - }; - if states - .insert(State(ident), StateInfo { definition: item.1 }) - .is_some() - { - ctx.emit_error("state redefined", item.1); - } - } - - if list.is_empty() { - ctx.emit_error("states cannot be empty", span); - } - } - TL::Item(S(sigma_upper!(pat), _), list) => { - if !alphabet.is_empty() { - ctx.emit_error("alphabet already set", span); - } - let Some(list) = list.expect_set(ctx) else { - continue; - }; - for item in list { - let Some(ident) = item.expect_ident(ctx) else { - continue; - }; - - if ident.chars().count() != 1 { - ctx.emit_error("letter cannot be longer than one char", item.1); - } - - if alphabet - .insert(Letter(ident), LetterInfo { definition: item.1 }) - .is_some() - { - ctx.emit_error("letter redefined", item.1); - } - } - if list.is_empty() { - ctx.emit_error("alphabet cannot be empty", span); - } - } - TL::Item(S("F", _), list) => { - if final_states.is_some() { - ctx.emit_error("final states already set", span); - } - let mut map = HashMap::new(); - let Some(list) = list.expect_set(ctx) else { - continue; - }; - for item in list { - let Some(ident) = item.expect_ident(ctx) else { - continue; - }; - if states.contains_key(&State(ident)) { - if map - .insert(State(ident), StateInfo { definition: item.1 }) - .is_some() - { - ctx.emit_error("final state redefined", item.1); - } - } else { - ctx.emit_error("final state not defined in set of states", item.1); - } - } - final_states = Some(map); - } - TL::Item(S(gamma_upper!(pat), _), list) => { - if !symbols.is_empty() { - ctx.emit_error("stack symbols already set", span); - } - let Some(list) = list.expect_set(ctx) else { - continue; - }; - for item in list { - let Some(ident) = item.expect_ident(ctx) else { - continue; - }; - - if symbols - .insert(Symbol(ident), SymbolInfo { definition: item.1 }) - .is_some() - { - ctx.emit_error("stack symbol redefined", item.1); - } - } - - if list.is_empty() { - ctx.emit_error("stack symbols cannot be empty", span); - } - } - TL::Item(S(INITIAL_STATE, _), S(src, src_d)) => match src { - ast::Item::Symbol(Sym::Ident(ident)) => { - if initial_state.is_some() { - ctx.emit_error("initial state already set", span); - } - if states.contains_key(&State(ident)) { - initial_state = Some(State(ident)) - } else { - ctx.emit_error("initial state symbol not defined as a state", src_d); - } - } - _ => _ = ctx.emit_error("expected ident", src_d), - }, - TL::Item(S(INITIAL_STACK, _), S(src, src_d)) => match src { - ast::Item::Symbol(Sym::Ident(ident)) => { - if initial_stack.is_some() { - ctx.emit_error("initial stack already set", span); - } - if symbols.contains_key(&Symbol(ident)) { - initial_stack = Some(Symbol(ident)); - } else { - ctx.emit_error( - "initial stack symbol not defined as a stack symbol", - src_d, - ); - } - } - _ => _ = ctx.emit_error("expected ident", src_d), - }, - TL::Item(S(name, dest_s), _) => { - ctx.emit_error(format!("unknown item {name:?}, expected states, alphabet, symbols, final states, initial state, initial stack"), dest_s); - } - - TL::TransitionFunc(S((S(delta_lower!(pat), _), tuple), _), list) => { - let list = list.set_weak(); - let Some((state, letter, stack_symbol)) = - tuple.as_ref().expect_pda_transition_function(ctx) - else { - continue; - }; - if !states.contains_key(&State(state.0)) { - ctx.emit_error("transition state not defined as state", state.1); - continue; - }; - if !symbols.contains_key(&Symbol(stack_symbol.0)) { - ctx.emit_error( - "transition stack symbol not defined as stack symbol", - stack_symbol.1, - ); - continue; - }; - - let letter: Option> = match letter.0 { - Sym::Epsilon(_) => { - if !options.epsilon_moves { - ctx.emit_error("epsilon moves not permitted", letter.1); - } - None - } - Sym::Ident(val) => { - if !alphabet.contains_key(&Letter(val)) { - ctx.emit_error( - "transition letter not defined in alphabet", - letter.1, - ); - } - Some(Letter(val)) - } - }; - - for item in list { - let Some((next_state, stack)) = item - .expect_tuple(ctx) - .and_then(|item| item.expect_pda_transition(ctx)) - else { - continue; - }; - - if !states.contains_key(&State(next_state.0)) { - ctx.emit_error("transition state not defined as state", next_state.1); - continue; - }; - - let stack: Vec<_> = stack - .iter() - .rev() - .filter_map(|symbol| { - if matches!(symbol.0, ast::Item::Symbol(Sym::Epsilon(_))) { - return None; - } - let ident = symbol.expect_ident(ctx)?; - - if !symbols.contains_key(&Symbol(ident)) { - ctx.emit_error("transition stack symbol not defined", symbol.1); - return None; - }; - Some(Symbol(ident)) - }) - .collect(); - - let entry: &mut _ = transitions - .entry(TransitionFrom { - letter, - state: State(state.0), - symbol: Symbol(stack_symbol.0), - }) - .or_default(); - if !entry.is_empty() && !options.non_deterministic { - ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1); - } - if !entry.insert(TransitionTo { - state: State(next_state.0), - stack, - - function: tuple.1, - transition: item.1, - }) { - ctx.emit_warning("duplicate transition", item.1); - } - } - } - TL::TransitionFunc(S((S(name, _), _), dest_s), _) => { - ctx.emit_error( - format!( - "unknown function {name:?}, expected transition function ( {} )", delta_lower!(str) - ), - dest_s, - ); - } - - TL::ProductionRule(_, _) => { - ctx.emit_error("unexpected production rule", span); - } - TL::Table() => _ = ctx.emit_error("unexpected table", span), - } + self.compile_top_level(element, span); } - if symbols.is_empty() { - ctx.emit_error_locless("stack symbols never defined"); + if self.states_def.is_none() { + self.ctx + .emit_error_locless("states never defined") + .emit_help_logless("add: Q = {...}"); } - if alphabet.is_empty() { - ctx.emit_error_locless("alphabet never defined"); + if self.alphabet_def.is_none() { + self.ctx + .emit_error_locless("alphabet never defined") + .emit_help_logless("add: E = {...}") + .emit_info_logless(concat!("E can be ", sigma_upper!(str))); } - if states.is_empty() { - ctx.emit_error_locless("states never defined"); + if self.symbols_def.is_none() { + self.ctx + .emit_error_locless("stack symbols never defined") + .emit_help_logless("add: G = {...}") + .emit_info_logless(concat!("G can be ", gamma_upper!(str))); } - let initial_stack = match initial_stack { - Some(some) => some, + // if self.final_states_def.is_none() { + // self.ctx + // .emit_error_locless("final states never defined") + // .emit_help_logless("add: F = {...}"); + // } + + let initial_state = match self.initial_state { + Some(some) => some.0, None => { - if symbols.contains_key(&Symbol("Z0")) { - ctx.emit_warning_locless( - "initial stack symbol not defined, defaulting to 'Z0'", - ); + if self.states.contains_key(&State("q0")) { + self.ctx + .emit_warning_locless("initial state not defined, defaulting to 'q0'") + .emit_help_logless(format!("add: {INITIAL_STATE} = q0")); } else { - ctx.emit_error_locless("initial stack symbol not defined"); - } - Symbol("Z0") - } - }; - - let initial_state = match initial_state { - Some(some) => some, - None => { - if states.contains_key(&State("q0")) { - ctx.emit_warning_locless("initial state not defined, defaulting to 'q0'"); - } else { - ctx.emit_error_locless("initial state not defined"); + self.ctx + .emit_error_locless("initial state not defined") + .emit_help_logless(format!("add: {INITIAL_STATE} = ...")); } State("q0") } }; - if ctx.contains_errors() { + let initial_stack = match self.initial_stack { + Some(some) => some.0, + None => { + if self.symbols.contains_key(&Symbol("Z0")) { + self.ctx + .emit_warning_locless( + "initial stack symbol not defined, defaulting to 'Z0'", + ) + .emit_help_logless(format!("add: {INITIAL_STACK} = Z0")); + } else { + self.ctx + .emit_error_locless("initial stack symbol not defined") + .emit_help_logless(format!("add: {INITIAL_STACK} = ...")); + } + Symbol("Z0") + } + }; + + if self.transitions.is_empty() { + self.ctx + .emit_warning_locless("no transitions defined") + .emit_help_logless( + "consider defining one: d(state, letter|epsilon, symbol) = (state, [symbol]) | {(state, [symbol]), ...}", + ) + .emit_info_logless(concat!("d can be ", delta_lower!(str))) + .emit_info_logless(concat!("epsilon can be ", epsilon!(str))); + } + + if self.ctx.contains_errors() { return None; } Some(Pda { initial_state, initial_stack, - states, - symbols, - alphabet, - final_states, - transitions, + states: self.states, + symbols: self.symbols, + alphabet: self.alphabet, + final_states: Some(self.final_states), + transitions: self.transitions, }) } + + fn compile_top_level(&mut self, element: ast::TopLevel<'a>, span: Span) { + use Spanned as S; + use ast::TopLevel as TL; + match element { + TL::Item(S("Q", _), list) => self.compile_states(list, span), + TL::Item(S(gamma_upper!(pat), _), list) => self.compile_symbols(list, span), + TL::Item(S(sigma_upper!(pat), _), list) => self.compile_alphabet(list, span), + TL::Item(S("F", _), list) => self.compile_final_states(list, span), + TL::Item(S(INITIAL_STATE, _), item) => self.compile_initial_state(item, span), + TL::Item(S(INITIAL_STACK, _), item) => self.compile_initial_stack(item, span), + TL::Item(S(name, dest_s), _) => { + self.ctx.emit_error(format!("unknown item {name:?}, expected states, stack symbols, alphabet, final states, initial state, initial stack"), dest_s); + } + + TL::TransitionFunc(S((S(delta_lower!(pat), _), args), _), list) => { + self.compile_transition_function(args, list) + } + TL::TransitionFunc(S((S(name, _), _), dest_s), _) => { + self.ctx.emit_error( + format!( + "unknown function {name:?}, expected transition function ( {} )", + delta_lower!(str) + ), + dest_s, + ); + } + + TL::ProductionRule(_, _) => { + self.ctx.emit_error("unexpected production rule", span); + } + TL::Table() => _ = self.ctx.emit_error("unexpected table", span), + } + } + + fn compile_states(&mut self, list: Spanned>, top_level: Span) { + if let Some(previous) = self.states_def { + self.ctx + .emit_error("states already set", top_level) + .emit_info("previously defined here", previous); + } + let Some(list) = list.expect_set(self.ctx) else { + return; + }; + for item in list { + let Some(ident) = item.expect_ident(self.ctx) else { + continue; + }; + if let Some(previous) = self + .states + .insert(State(ident), StateInfo { definition: item.1 }) + { + self.ctx + .emit_error("state redefined", item.1) + .emit_info("previously defined here", previous.definition); + } + } + + if list.is_empty() { + self.ctx.emit_error("states cannot be empty", top_level); + } + self.states_def = Some(top_level); + } + + fn compile_symbols(&mut self, list: Spanned>, top_level: Span) { + if let Some(previous) = self.symbols_def { + self.ctx + .emit_error("stack symbols already set", top_level) + .emit_info("previously defined here", previous); + } + let Some(list) = list.expect_set(self.ctx) else { + return; + }; + for item in list { + let Some(ident) = item.expect_ident(self.ctx) else { + continue; + }; + if let Some(previous) = self + .symbols + .insert(Symbol(ident), SymbolInfo { definition: item.1 }) + { + self.ctx + .emit_error("stack symbol redefined", item.1) + .emit_info("previously defined here", previous.definition); + } + } + + if list.is_empty() { + self.ctx.emit_error("states cannot be empty", top_level); + } + self.symbols_def = Some(top_level); + } + + fn compile_alphabet(&mut self, list: Spanned>, top_level: Span) { + if let Some(previous) = self.alphabet_def { + self.ctx + .emit_error("alphabet already set", top_level) + .emit_info("previously defined here", previous); + } + let Some(list) = list.expect_set(self.ctx) else { + return; + }; + for item in list { + let Some(ident) = item.expect_ident(self.ctx) else { + continue; + }; + + if ident.chars().count() != 1 { + self.ctx + .emit_error("letter cannot be longer than one char", item.1); + } + + if let Some(previous) = self + .alphabet + .insert(Letter(ident), LetterInfo { definition: item.1 }) + { + self.ctx + .emit_error("letter redefined", item.1) + .emit_help("previously defined here", previous.definition); + } + } + if list.is_empty() { + self.ctx.emit_error("alphabet cannot be empty", top_level); + } + self.alphabet_def = Some(top_level); + } + + fn compile_final_states(&mut self, list: Spanned>, top_level: Span) { + if let Some(previous) = self.final_states_def { + self.ctx + .emit_error("final states already set", top_level) + .emit_help("previously defined here", previous); + } + let Some(list) = list.expect_set(self.ctx) else { + return; + }; + for item in list { + let Some(ident) = item.expect_ident(self.ctx) else { + continue; + }; + if self.states.contains_key(&State(ident)) { + if self + .final_states + .insert(State(ident), StateInfo { definition: item.1 }) + .is_some() + { + self.ctx.emit_error("final state redefined", item.1); + } + } else { + self.ctx + .emit_error("final state not defined in set of states", item.1); + } + } + self.final_states_def = Some(top_level); + } + + fn compile_initial_state( + &mut self, + Spanned(src, src_d): Spanned>, + top_level: Span, + ) { + match src { + ast::Item::Symbol(Sym::Ident(ident)) => { + if let Some((_, previous)) = self.initial_state { + self.ctx + .emit_error("initial state already set", top_level) + .emit_help("previously defined here", previous); + } + if self.states.contains_key(&State(ident)) { + self.initial_state = Some((State(ident), top_level)) + } else { + self.ctx + .emit_error("initial state symbol not defined as a state", src_d); + } + } + _ => _ = self.ctx.emit_error("expected ident", src_d), + } + } + + fn compile_initial_stack( + &mut self, + Spanned(src, src_d): Spanned>, + top_level: Span, + ) { + match src { + ast::Item::Symbol(Sym::Ident(ident)) => { + if let Some((_, previous)) = self.initial_stack { + self.ctx + .emit_error("initial stack symbol already set", top_level) + .emit_help("previously defined here", previous); + } + if self.symbols.contains_key(&Symbol(ident)) { + self.initial_stack = Some((Symbol(ident), top_level)) + } else { + self.ctx + .emit_error("initial stack symbol not defined as a state", src_d); + } + } + _ => _ = self.ctx.emit_error("expected ident", src_d), + } + } + + fn compile_transition_function( + &mut self, + args: Spanned>, + list: Spanned>, + ) { + let list = list.set_weak(); + let Some((state, letter, stack_symbol)) = + args.as_ref().expect_pda_transition_function(self.ctx) + else { + return; + }; + if !self.states.contains_key(&State(state.0)) { + self.ctx + .emit_error("transition state not defined as state", state.1); + return; + }; + if !self.symbols.contains_key(&Symbol(stack_symbol.0)) { + self.ctx.emit_error( + "transition stack symbol not defined as stack symbol", + stack_symbol.1, + ); + return; + }; + + let letter: Option> = match letter.0 { + Sym::Epsilon(_) => { + if !self.options.epsilon_moves { + self.ctx.emit_error("epsilon moves not permitted", letter.1); + } + None + } + Sym::Ident(val) => { + if !self.alphabet.contains_key(&Letter(val)) { + self.ctx + .emit_error("transition letter not defined in alphabet", letter.1); + } + Some(Letter(val)) + } + }; + + for item in list { + let Some((next_state, stack)) = item + .expect_tuple(self.ctx) + .and_then(|item| item.expect_pda_transition(self.ctx)) + else { + continue; + }; + + if !self.states.contains_key(&State(next_state.0)) { + self.ctx + .emit_error("transition state not defined as state", next_state.1); + continue; + }; + + let stack: Vec<_> = stack + .iter() + .rev() + .filter_map(|symbol| { + if matches!(symbol.0, ast::Item::Symbol(Sym::Epsilon(_))) { + return None; + } + let ident = symbol.expect_ident(self.ctx)?; + + if !self.symbols.contains_key(&Symbol(ident)) { + self.ctx + .emit_error("transition stack symbol not defined", symbol.1); + return None; + }; + Some(Symbol(ident)) + }) + .collect(); + + let entry: &mut _ = self + .transitions + .entry(TransitionFrom { + letter, + state: State(state.0), + symbol: Symbol(stack_symbol.0), + }) + .or_default(); + if !entry.is_empty() && !self.options.non_deterministic { + self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1); + } + if !entry.insert(TransitionTo { + state: State(next_state.0), + stack, + + function: args.1, + transition: item.1, + }) { + self.ctx.emit_warning("duplicate transition", item.1); + } + } + } } impl<'a, 'b> Spanned<&'b ast::Tuple<'a>> { @@ -382,10 +519,12 @@ impl<'a, 'b> Spanned<&'b ast::Tuple<'a>> { Spanned(symbol, *symbol_span), )); } - _ => _ = ctx.emit_error( - "expected PDA transition function (state, letter|epsilon, symbol)", - self.1, - ), + _ => { + _ = ctx.emit_error( + "expected PDA transition function (state, letter|epsilon, symbol)", + self.1, + ) + } } None } diff --git a/automata/src/automatan/tm.rs b/automata/src/automatan/tm.rs index 7c52244..8e9906d 100644 --- a/automata/src/automatan/tm.rs +++ b/automata/src/automatan/tm.rs @@ -2,9 +2,14 @@ use std::collections::HashSet; use super::*; -use crate::{delta_lower, dual_struct_serde, gamma_upper, loader::{ - BLANK_SYMBOL, Context, Spanned, ast::{self, Symbol as Sym}, log::LogSink -}}; +use crate::{ + delta_lower, dual_struct_serde, + loader::{ + BLANK_SYMBOL, Context, INITIAL_STATE, Spanned, + ast::{self, Symbol as Sym}, + log::LogSink, + }, +}; dual_struct_serde! { #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] pub struct TransitionFrom<'a> { @@ -43,7 +48,7 @@ dual_struct_serde! {{#[serde_with::serde_as]} #[serde(borrow)] pub initial_state: State<'a>, #[serde(borrow)] - pub initial_tape: Symbol<'a>, + pub blank_symbol: Symbol<'a>, #[serde(borrow)] pub states: HashMap, StateInfo>, #[serde(borrow)] @@ -52,7 +57,7 @@ dual_struct_serde! {{#[serde_with::serde_as]} #[serde(borrow)] pub final_states: HashMap, StateInfo>, - + #[serde(borrow)] #[serde_as(as = "serde_with::Seq<(_, _)>")] pub transitions: HashMap, HashSet>>, @@ -65,237 +70,341 @@ impl<'a> Tm<'a> { ctx: &mut Context<'a>, options: Options, ) -> Option> { - let mut initial_state = None; - let mut initial_tape = None; + TmCompiler::new(ctx, options).compile(items) + } +} - let mut states = HashMap::new(); - let mut symbols = HashMap::new(); - let mut final_states = HashMap::new(); +pub struct TmCompiler<'a, 'b> { + ctx: &'b mut Context<'a>, + options: Options, - let mut transitions: HashMap, HashSet>> = - HashMap::new(); + initial_state: Option<(State<'a>, Span)>, + blank_symbol: Option<(Symbol<'a>, Span)>, + states: HashMap, StateInfo>, + states_def: Option, + + symbols: HashMap, SymbolInfo>, + symbols_def: Option, + + final_states: HashMap, StateInfo>, + final_states_def: Option, + + transitions: HashMap, HashSet>>, +} + +impl<'a, 'b> TmCompiler<'a, 'b> { + pub fn new(ctx: &'b mut Context<'a>, options: Options) -> Self { + Self { + ctx, + options, + + initial_state: Default::default(), + blank_symbol: Default::default(), + states: Default::default(), + states_def: Default::default(), + symbols: Default::default(), + symbols_def: Default::default(), + final_states: Default::default(), + final_states_def: Default::default(), + transitions: Default::default(), + } + } + + pub fn compile( + mut self, + items: impl Iterator>>, + ) -> Option> { for Spanned(element, span) in items { - use Spanned as S; - use ast::TopLevel as TL; - match element { - TL::Item(S("Q", _), list) => { - if !states.is_empty() { - ctx.emit_error("states already set", span); - } - let Some(list) = list.expect_set(ctx) else { - continue; - }; - for item in list { - let Some(ident) = item.expect_ident(ctx) else { - continue; - }; - if states - .insert(State(ident), StateInfo { definition: item.1 }) - .is_some() - { - ctx.emit_error("state redefined", item.1); - } - } - - if list.is_empty() { - ctx.emit_error("states cannot be empty", span); - } - } - TL::Item(S("F", _), list) => { - if !final_states.is_empty() { - ctx.emit_error("final states already set", span); - } - let Some(list) = list.expect_set(ctx) else { - continue; - }; - for item in list { - let Some(ident) = item.expect_ident(ctx) else { - continue; - }; - if states.contains_key(&State(ident)) { - if final_states - .insert(State(ident), StateInfo { definition: item.1 }) - .is_none() - { - ctx.emit_error("final state redefined", item.1); - } - } else { - ctx.emit_error("final state not defined in set of states", item.1); - } - } - } - TL::Item(S(gamma_upper!(pat), _), list) => { - if !symbols.is_empty() { - ctx.emit_error("tape symbols already set", span); - } - let Some(list) = list.expect_set(ctx) else { - continue; - }; - for item in list { - let Some(ident) = item.expect_ident(ctx) else { - continue; - }; - - if symbols - .insert(Symbol(ident), SymbolInfo { definition: item.1 }) - .is_some() - { - ctx.emit_error("tape symbol redefined", item.1); - } - } - - if list.is_empty() { - ctx.emit_error("tape symbols cannot be empty", span); - } - } - TL::Item(S("q0", _), S(src, src_d)) => match src { - ast::Item::Symbol(Sym::Ident(ident)) => { - if initial_state.is_some() { - ctx.emit_error("initial state already set", span); - } - if states.contains_key(&State(ident)) { - initial_state = Some(State(ident)) - } else { - ctx.emit_error("initial state symbol not defined as a state", src_d); - } - } - _ => _ = ctx.emit_error("expected ident", src_d), - }, - TL::Item(S(BLANK_SYMBOL, _), S(src, src_d)) => match src { - ast::Item::Symbol(Sym::Ident(ident)) => { - if initial_tape.is_some() { - ctx.emit_error("initial tape symbol already set", span); - } - if symbols.contains_key(&Symbol(ident)) { - initial_tape = Some(Symbol(ident)); - } else { - ctx.emit_error( - "initial tape symbol not defined as a tape symbol", - src_d, - ); - } - } - _ => _ = ctx.emit_error("expected ident", src_d), - }, - TL::Item(S(name, dest_s), _) => { - ctx.emit_error(format!("unknown item {name:?}, expected states, symbols, final states, initial state, blank symbol"), dest_s); - } - - TL::TransitionFunc(S((S(delta_lower!(pat), _), tuple), _), list) => { - let list = list.set_weak(); - let Some((from_state, from_tape)) = - tuple.as_ref().expect_tm_transition_function(ctx) - else { - continue; - }; - if !states.contains_key(&State(from_state.0)) { - ctx.emit_error("transition state not defined as state", from_state.1); - continue; - }; - if !symbols.contains_key(&Symbol(from_tape.0)) { - ctx.emit_error( - "transition tape symbol not defined as tape symbol", - from_tape.1, - ); - continue; - }; - - for item in list { - let Some((to_state, to_tape, direction)) = item - .expect_tuple(ctx) - .and_then(|item| item.expect_tm_transition(ctx)) - else { - continue; - }; - - if !states.contains_key(&State(to_state.0)) { - ctx.emit_error("transition state not defined as state", to_state.1); - continue; - }; - - let entry: &mut _ = transitions - .entry(TransitionFrom { - state: State(from_state.0), - symbol: Symbol(from_tape.0), - }) - .or_default(); - if !entry.is_empty() && !options.non_deterministic { - ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1); - } - if !entry.insert(TransitionTo { - state: State(to_state.0), - symbol: Symbol(to_tape.0), - direction: direction.0, - - function: tuple.1, - transition: item.1, - }) { - ctx.emit_warning("duplicate transition", item.1); - } - } - } - TL::TransitionFunc(S((S(name, _), _), dest_s), _) => { - ctx.emit_error( - format!( - "unknown function {name:?}, expected transition function ( {} )", delta_lower!(str) - ), - dest_s, - ); - } - - TL::ProductionRule(_, _) => { - ctx.emit_error("unexpected production rule", span); - } - TL::Table() => _ = ctx.emit_error("unexpected table", span), - } + self.compile_top_level(element, span); } - if symbols.is_empty() { - ctx.emit_error_locless("tape symbols never defined"); + if self.final_states_def.is_none() { + self.ctx + .emit_error_locless("final states never defined") + .emit_help_logless("add: F = {...}"); } - if states.is_empty() { - ctx.emit_error_locless("states never defined"); - } - - let initial_tape = match initial_tape { - Some(some) => some, + let initial_state = match self.initial_state { + Some(some) => some.0, None => { - if symbols.contains_key(&Symbol("z0")) { - ctx.emit_warning_locless("initial tape symbol not defined, defaulting to 'z0'"); + if self.states.contains_key(&State("q0")) { + self.ctx + .emit_warning_locless("initial state not defined, defaulting to 'q0'") + .emit_help_logless(format!("add: {INITIAL_STATE} = q0")); } else { - ctx.emit_error_locless("initial tape symbol not defined"); - } - Symbol("z0") - } - }; - - let initial_state = match initial_state { - Some(some) => some, - None => { - if states.contains_key(&State("q0")) { - ctx.emit_warning_locless("initial state not defined, defaulting to 'q0'"); - } else { - ctx.emit_error_locless("initial state not defined"); + self.ctx + .emit_error_locless("initial state not defined") + .emit_help_logless(format!("add: {BLANK_SYMBOL} = ...")); } State("q0") } }; - if ctx.contains_errors() { + let blank_symbol = match self.blank_symbol { + Some(some) => some.0, + None => { + if self.symbols.contains_key(&Symbol("B")) { + self.ctx + .emit_warning_locless("blank symbol not defined, defaulting to 'B'") + .emit_help_logless(format!("add: {BLANK_SYMBOL} = B")); + } else { + self.ctx + .emit_error_locless("blank symbol not defined") + .emit_help_logless(format!("add: {BLANK_SYMBOL} = ...")); + } + Symbol("B") + } + }; + + if self.transitions.is_empty() { + self.ctx + .emit_warning_locless("no transitions defined") + .emit_help_logless( + "consider defining one: d(state, symbol) = (state, symbol, direction) | {(state, symbol, direction), ...}", + ) + .emit_info_logless(concat!("d can be ", delta_lower!(str))); + } + + if self.ctx.contains_errors() { return None; } Some(Tm { initial_state, - initial_tape, - states, - symbols, - final_states, - transitions, + blank_symbol, + states: self.states, + symbols: self.symbols, + final_states: self.final_states, + transitions: self.transitions, }) } + + fn compile_top_level(&mut self, element: ast::TopLevel<'a>, span: Span) { + use Spanned as S; + use ast::TopLevel as TL; + match element { + TL::Item(S("Q", _), list) => self.compile_states(list, span), + TL::Item(S(delta_lower!(pat), _), list) => self.compile_symbols(list, span), + TL::Item(S("F", _), list) => self.compile_final_states(list, span), + TL::Item(S(INITIAL_STATE, _), item) => self.compile_initial_state(item, span), + TL::Item(S(BLANK_SYMBOL, _), item) => self.compile_blank_symbol(item, span), + TL::Item(S(name, dest_s), _) => { + self.ctx.emit_error(format!("unknown item {name:?}, expected states, symbols, final states, initial state, blank symbol"), dest_s); + } + + TL::TransitionFunc(S((S(delta_lower!(pat), _), args), _), list) => { + self.compile_transition_function(args, list) + } + TL::TransitionFunc(S((S(name, _), _), dest_s), _) => { + self.ctx.emit_error( + format!( + "unknown function {name:?}, expected transition function ( {} )", + delta_lower!(str) + ), + dest_s, + ); + } + + TL::ProductionRule(_, _) => { + self.ctx.emit_error("unexpected production rule", span); + } + TL::Table() => _ = self.ctx.emit_error("unexpected table", span), + } + } + + fn compile_states(&mut self, list: Spanned>, top_level: Span) { + if let Some(previous) = self.states_def { + self.ctx + .emit_error("states already set", top_level) + .emit_info("previously defined here", previous); + } + let Some(list) = list.expect_set(self.ctx) else { + return; + }; + for item in list { + let Some(ident) = item.expect_ident(self.ctx) else { + continue; + }; + if let Some(previous) = self + .states + .insert(State(ident), StateInfo { definition: item.1 }) + { + self.ctx + .emit_error("state redefined", item.1) + .emit_info("previously defined here", previous.definition); + } + } + + if list.is_empty() { + self.ctx.emit_error("states cannot be empty", top_level); + } + self.states_def = Some(top_level); + } + + fn compile_symbols(&mut self, list: Spanned>, top_level: Span) { + if let Some(previous) = self.symbols_def { + self.ctx + .emit_error("stack symbols already set", top_level) + .emit_info("previously defined here", previous); + } + let Some(list) = list.expect_set(self.ctx) else { + return; + }; + for item in list { + let Some(ident) = item.expect_ident(self.ctx) else { + continue; + }; + if let Some(previous) = self + .symbols + .insert(Symbol(ident), SymbolInfo { definition: item.1 }) + { + self.ctx + .emit_error("stack symbol redefined", item.1) + .emit_info("previously defined here", previous.definition); + } + } + + if list.is_empty() { + self.ctx.emit_error("states cannot be empty", top_level); + } + self.symbols_def = Some(top_level); + } + + fn compile_final_states(&mut self, list: Spanned>, top_level: Span) { + if let Some(previous) = self.final_states_def { + self.ctx + .emit_error("final states already set", top_level) + .emit_help("previously defined here", previous); + } + let Some(list) = list.expect_set(self.ctx) else { + return; + }; + for item in list { + let Some(ident) = item.expect_ident(self.ctx) else { + continue; + }; + if self.states.contains_key(&State(ident)) { + if self + .final_states + .insert(State(ident), StateInfo { definition: item.1 }) + .is_some() + { + self.ctx.emit_error("final state redefined", item.1); + } + } else { + self.ctx + .emit_error("final state not defined in set of states", item.1); + } + } + self.final_states_def = Some(top_level); + } + + fn compile_initial_state( + &mut self, + Spanned(src, src_d): Spanned>, + top_level: Span, + ) { + match src { + ast::Item::Symbol(Sym::Ident(ident)) => { + if let Some((_, previous)) = self.initial_state { + self.ctx + .emit_error("initial state already set", top_level) + .emit_help("previously defined here", previous); + } + if self.states.contains_key(&State(ident)) { + self.initial_state = Some((State(ident), top_level)) + } else { + self.ctx + .emit_error("initial state symbol not defined as a state", src_d); + } + } + _ => _ = self.ctx.emit_error("expected ident", src_d), + } + } + + fn compile_blank_symbol( + &mut self, + Spanned(src, src_d): Spanned>, + top_level: Span, + ) { + match src { + ast::Item::Symbol(Sym::Ident(ident)) => { + if let Some((_, previous)) = self.blank_symbol { + self.ctx + .emit_error("blank symbol already set", top_level) + .emit_help("previously defined here", previous); + } + if self.states.contains_key(&State(ident)) { + self.blank_symbol = Some((Symbol(ident), top_level)) + } else { + self.ctx + .emit_error("blank symbol not defined as a state", src_d); + } + } + _ => _ = self.ctx.emit_error("expected ident", src_d), + } + } + + fn compile_transition_function( + &mut self, + args: Spanned>, + list: Spanned>, + ) { + let list = list.set_weak(); + let Some((from_state, from_tape)) = args.as_ref().expect_tm_transition_function(self.ctx) + else { + return; + }; + if !self.states.contains_key(&State(from_state.0)) { + self.ctx + .emit_error("transition state not defined as state", from_state.1); + return; + }; + if !self.symbols.contains_key(&Symbol(from_tape.0)) { + self.ctx.emit_error( + "transition tape symbol not defined as tape symbol", + from_tape.1, + ); + return; + }; + + for item in list { + let Some((to_state, to_tape, direction)) = item + .expect_tuple(self.ctx) + .and_then(|item| item.expect_tm_transition(self.ctx)) + else { + continue; + }; + + if !self.states.contains_key(&State(to_state.0)) { + self.ctx + .emit_error("transition state not defined as state", to_state.1); + continue; + }; + + let entry: &mut _ = self + .transitions + .entry(TransitionFrom { + state: State(from_state.0), + symbol: Symbol(from_tape.0), + }) + .or_default(); + if !entry.is_empty() && !self.options.non_deterministic { + self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1); + } + if !entry.insert(TransitionTo { + state: State(to_state.0), + symbol: Symbol(to_tape.0), + direction: direction.0, + + function: args.1, + transition: item.1, + }) { + self.ctx.emit_warning("duplicate transition", item.1); + } + } + } } impl<'a> Spanned<&ast::Tuple<'a>> { @@ -343,10 +452,12 @@ impl<'a> Spanned<&ast::Tuple<'a>> { Spanned(direction, *direction_span), )); } - _ => _ = ctx.emit_error( - "expected TM transition function (state, symbol, direction)", - self.1, - ), + _ => { + _ = ctx.emit_error( + "expected TM transition function (state, symbol, direction)", + self.1, + ) + } } None } diff --git a/automata/src/loader/ast.rs b/automata/src/loader/ast.rs index 4db3aca..a70ed32 100644 --- a/automata/src/loader/ast.rs +++ b/automata/src/loader/ast.rs @@ -77,7 +77,9 @@ impl<'a> Spanned> { pub fn expect_ident(&self, ctx: &mut Context<'a>) -> Option<&'a str> { match &self.0 { Item::Symbol(Symbol::Ident(ident)) => return Some(ident), - Item::Symbol(Symbol::Epsilon(_)) => _ = ctx.emit_error("expected ident found epsilon", self.1), + Item::Symbol(Symbol::Epsilon(_)) => { + _ = ctx.emit_error("expected ident found epsilon", self.1) + } Item::Tuple(_) => _ = ctx.emit_error("expected ident found tuple", self.1), Item::List(_) => _ = ctx.emit_error("expected ident found list", self.1), } @@ -86,8 +88,12 @@ impl<'a> Spanned> { pub fn expect_set(&self, ctx: &mut Context<'a>) -> Option<&[Spanned>]> { match &self.0 { - Item::Symbol(Symbol::Ident(_)) => _ = ctx.emit_error("expected set found ident", self.1), - Item::Symbol(Symbol::Epsilon(_)) => _ = ctx.emit_error("expected set found epsilon", self.1), + Item::Symbol(Symbol::Ident(_)) => { + _ = ctx.emit_error("expected set found ident", self.1) + } + Item::Symbol(Symbol::Epsilon(_)) => { + _ = ctx.emit_error("expected set found epsilon", self.1) + } Item::Tuple(_) => _ = ctx.emit_error("expected set found tuple", self.1), Item::List(list) => return Some(&list.0), } @@ -96,8 +102,12 @@ impl<'a> Spanned> { pub fn expect_list(&self, ctx: &mut Context<'a>) -> Option<&[Spanned>]> { match &self.0 { - Item::Symbol(Symbol::Ident(_)) => _ = ctx.emit_error("expected list found ident", self.1), - Item::Symbol(Symbol::Epsilon(_)) => _ = ctx.emit_error("expected list found epsilon", self.1), + Item::Symbol(Symbol::Ident(_)) => { + _ = ctx.emit_error("expected list found ident", self.1) + } + Item::Symbol(Symbol::Epsilon(_)) => { + _ = ctx.emit_error("expected list found epsilon", self.1) + } Item::Tuple(_) => _ = ctx.emit_error("expected list found tuple", self.1), Item::List(list) => return Some(&list.0), } @@ -120,8 +130,12 @@ impl<'a> Spanned> { pub fn expect_tuple(&self, ctx: &mut Context<'a>) -> Option>> { match &self.0 { - Item::Symbol(Symbol::Ident(_)) => _ = ctx.emit_error("expected tuple found ident", self.1), - Item::Symbol(Symbol::Epsilon(_)) => _ = ctx.emit_error("expected tuple found epsilon", self.1), + Item::Symbol(Symbol::Ident(_)) => { + _ = ctx.emit_error("expected tuple found ident", self.1) + } + Item::Symbol(Symbol::Epsilon(_)) => { + _ = ctx.emit_error("expected tuple found epsilon", self.1) + } Item::Tuple(tuple) => return Some(Spanned(tuple, self.1)), Item::List(_) => _ = ctx.emit_error("expected tuple found list", self.1), } diff --git a/automata/src/loader/lexer.rs b/automata/src/loader/lexer.rs index 183ecde..c4bc999 100644 --- a/automata/src/loader/lexer.rs +++ b/automata/src/loader/lexer.rs @@ -96,7 +96,10 @@ fn begin_ident(c: char) -> bool { } fn continue_ident(c: char) -> bool { - c.is_alphanumeric() || c == '_' || c=='\'' || (!c.is_ascii() && !c.is_control() && !c.is_whitespace()) + c.is_alphanumeric() + || c == '_' + || c == '\'' + || (!c.is_ascii() && !c.is_control() && !c.is_whitespace()) } impl<'a> std::iter::Iterator for Lexer<'a> { diff --git a/automata/src/loader/log.rs b/automata/src/loader/log.rs index 084047c..4766072 100644 --- a/automata/src/loader/log.rs +++ b/automata/src/loader/log.rs @@ -2,7 +2,6 @@ use std::fmt::Display; use crate::loader::Span; - #[cfg_attr(feature = "serde", derive(serde::Serialize))] pub struct Logs { logs: Vec, diff --git a/automata/src/loader/mod.rs b/automata/src/loader/mod.rs index ef4546b..88fbb12 100644 --- a/automata/src/loader/mod.rs +++ b/automata/src/loader/mod.rs @@ -1,8 +1,10 @@ use crate::{ - automatan::*, dual_enum_serde, dual_struct_serde, loader::{ + automatan::*, + dual_enum_serde, + loader::{ ast::TopLevel, log::{LogEntry, LogSink}, - } + }, }; pub mod ast; @@ -119,8 +121,8 @@ impl<'a> Context<'a> { } } -dual_enum_serde!{ - {#[serde(tag = "type")] #[serde(rename_all = "snake_case")]} +dual_enum_serde! { + {#[serde(tag = "type")] #[serde(rename_all = "snake_case")]} #[derive(Clone, Debug)] pub enum Machine<'a> { Fa(#[serde(borrow)] fa::Fa<'a>), diff --git a/automata/src/loader/parser.rs b/automata/src/loader/parser.rs index d03a5c9..2426339 100644 --- a/automata/src/loader/parser.rs +++ b/automata/src/loader/parser.rs @@ -93,7 +93,7 @@ impl<'a, 'b> Parser<'a, 'b> { fn parse_as_symbol(&mut self, tok: S>) -> S> { match tok { S(T::Tilde, r) => S(Symbol::Epsilon("~"), r), - S(T::Ident(repr@ epsilon!(pat)), r) => S(Symbol::Epsilon(repr), r), + S(T::Ident(repr @ epsilon!(pat)), r) => S(Symbol::Epsilon(repr), r), S(T::Ident(ident), r) => S(Symbol::Ident(ident), r), S(got, span) => { self.ctx.emit_error( diff --git a/cli/src/main.rs b/cli/src/main.rs index 496680a..da0f5d9 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,5 +1 @@ - - -pub fn main(){ - -} \ No newline at end of file +pub fn main() {} diff --git a/web/root/src/automata.ts b/web/root/src/automata.ts index f1b8a12..6577551 100644 --- a/web/root/src/automata.ts +++ b/web/root/src/automata.ts @@ -189,7 +189,7 @@ export type Tm = { type: "tm"; initial_state: State; - initial_tape: Symbol; + blank_symbol: Symbol; states: Map; symbols: Map; alphabet: Map; diff --git a/web_lib/src/lib.rs b/web_lib/src/lib.rs index 75c4099..f38855d 100644 --- a/web_lib/src/lib.rs +++ b/web_lib/src/lib.rs @@ -94,7 +94,7 @@ pub fn lex(input: &str) -> Vec { } // ugly hack to keep single ascii letters non keyworded for user - Token::Ident(ident) if ident.is_ascii() && ident.len()==1 => Kind::Ident, + Token::Ident(ident) if ident.is_ascii() && ident.len() == 1 => Kind::Ident, Token::Ident( epsilon!(pat) | delta_lower!(pat) | sigma_upper!(pat) | gamma_upper!(pat), ) => Kind::Keyword, From ba996ee942a22e097a55a9d602ee07d35d722f76 Mon Sep 17 00:00:00 2001 From: ParkerTenBroeck <51721964+ParkerTenBroeck@users.noreply.github.com> Date: Sun, 11 Jan 2026 22:20:42 -0500 Subject: [PATCH 3/3] added explicit accept by for PDAs --- automata/src/automatan/fa.rs | 2 +- automata/src/automatan/pda.rs | 81 ++++++++++++++++++++++++++++++++--- automata/src/automatan/tm.rs | 2 +- web/root/src/examples.ts | 23 +++++----- 4 files changed, 89 insertions(+), 19 deletions(-) diff --git a/automata/src/automatan/fa.rs b/automata/src/automatan/fa.rs index ab8cecc..ff7399b 100644 --- a/automata/src/automatan/fa.rs +++ b/automata/src/automatan/fa.rs @@ -173,7 +173,7 @@ impl<'a, 'b> FaCompiler<'a, 'b> { TL::Item(S("F", _), list) => self.compile_final_states(list, span), TL::Item(S(INITIAL_STATE, _), item) => self.compile_initial_state(item, span), TL::Item(S(name, dest_s), _) => { - self.ctx.emit_error(format!("unknown item {name:?}, expected states, alphabet, final states, initial state"), dest_s); + self.ctx.emit_error(format!("unknown item {name:?}, expected states | alphabet | final states | initial state"), dest_s); } TL::TransitionFunc(S((S(delta_lower!(pat), _), args), _), list) => { diff --git a/automata/src/automatan/pda.rs b/automata/src/automatan/pda.rs index 5959ed9..e3c2c02 100644 --- a/automata/src/automatan/pda.rs +++ b/automata/src/automatan/pda.rs @@ -60,12 +60,19 @@ dual_struct_serde! { {#[serde_with::serde_as]} } } +#[derive(Clone, Copy)] +enum AcceptBy { + EmptyStack, + FinalState, +} + pub struct PdaCompiler<'a, 'b> { ctx: &'b mut Context<'a>, options: Options, initial_state: Option<(State<'a>, Span)>, initial_stack: Option<(Symbol<'a>, Span)>, + accept_by: Option<(AcceptBy, Span)>, states: HashMap, StateInfo>, states_def: Option, @@ -92,6 +99,18 @@ impl<'a> Pda<'a> { } } +macro_rules! accept_empty { + ($ident: ident) => { + $crate::maker!($ident: "N","n","null","empty","E","Z0","z0") + }; +} + +macro_rules! accept_final { + ($ident: ident) => { + $crate::maker!($ident: "F","final") + }; +} + impl<'a, 'b> PdaCompiler<'a, 'b> { pub fn new(ctx: &'b mut Context<'a>, options: Options) -> Self { Self { @@ -100,6 +119,7 @@ impl<'a, 'b> PdaCompiler<'a, 'b> { initial_state: Default::default(), initial_stack: Default::default(), + accept_by: Default::default(), states: Default::default(), states_def: Default::default(), symbols: Default::default(), @@ -140,11 +160,32 @@ impl<'a, 'b> PdaCompiler<'a, 'b> { .emit_info_logless(concat!("G can be ", gamma_upper!(str))); } - // if self.final_states_def.is_none() { - // self.ctx - // .emit_error_locless("final states never defined") - // .emit_help_logless("add: F = {...}"); - // } + if self.accept_by.is_none() { + self.ctx + .emit_error_locless("accept by never defined") + .emit_help_logless("add: accept = N|F") + .emit_info_logless(concat!( + "accept by empty stack N can be ", + accept_empty!(str) + )) + .emit_info_logless(concat!( + "accept by final state F can be ", + accept_final!(str) + )); + } + + if self.final_states_def.is_none() + && matches!(self.accept_by, Some((AcceptBy::FinalState, _))) + { + self.ctx + .emit_error_locless("final states never defined") + .emit_help_logless("add: F = {...}"); + }else if let (Some((AcceptBy::EmptyStack, empty)), Some(states)) = (self.accept_by, self.final_states_def){ + self.ctx + .emit_error_locless("final states defined alongside accept by empty stack") + .emit_help("either remote to accept by empty stack", states) + .emit_help("or remote to accept by final state", empty); + } let initial_state = match self.initial_state { Some(some) => some.0, @@ -194,13 +235,16 @@ impl<'a, 'b> PdaCompiler<'a, 'b> { return None; } + let final_states = + matches!(self.accept_by, Some((AcceptBy::FinalState, _))).then_some(self.final_states); + Some(Pda { initial_state, initial_stack, states: self.states, symbols: self.symbols, alphabet: self.alphabet, - final_states: Some(self.final_states), + final_states, transitions: self.transitions, }) } @@ -209,6 +253,7 @@ impl<'a, 'b> PdaCompiler<'a, 'b> { use Spanned as S; use ast::TopLevel as TL; match element { + TL::Item(S("accept", _), item) => self.compile_accept_by(item, span), TL::Item(S("Q", _), list) => self.compile_states(list, span), TL::Item(S(gamma_upper!(pat), _), list) => self.compile_symbols(list, span), TL::Item(S(sigma_upper!(pat), _), list) => self.compile_alphabet(list, span), @@ -216,7 +261,7 @@ impl<'a, 'b> PdaCompiler<'a, 'b> { TL::Item(S(INITIAL_STATE, _), item) => self.compile_initial_state(item, span), TL::Item(S(INITIAL_STACK, _), item) => self.compile_initial_stack(item, span), TL::Item(S(name, dest_s), _) => { - self.ctx.emit_error(format!("unknown item {name:?}, expected states, stack symbols, alphabet, final states, initial state, initial stack"), dest_s); + self.ctx.emit_error(format!("unknown item {name:?}, expected states | stack symbols | alphabet | accept by | final states | initial state | initial stack"), dest_s); } TL::TransitionFunc(S((S(delta_lower!(pat), _), args), _), list) => { @@ -239,6 +284,28 @@ impl<'a, 'b> PdaCompiler<'a, 'b> { } } + fn compile_accept_by(&mut self, item: Spanned>, top_level: Span) { + if let Some((_, previous)) = self.accept_by { + self.ctx + .emit_error("accept by already set", top_level) + .emit_info("previously defined here", previous); + } + let Some(by) = item.expect_ident(self.ctx) else { + return; + }; + + let by = match by { + accept_empty!(pat) => AcceptBy::EmptyStack, + accept_final!(pat) => AcceptBy::FinalState, + _ => { + self.ctx.emit_error("invalid accept by", item.1); + return; + } + }; + + self.accept_by = Some((by, top_level)); + } + fn compile_states(&mut self, list: Spanned>, top_level: Span) { if let Some(previous) = self.states_def { self.ctx diff --git a/automata/src/automatan/tm.rs b/automata/src/automatan/tm.rs index 8e9906d..adc57f4 100644 --- a/automata/src/automatan/tm.rs +++ b/automata/src/automatan/tm.rs @@ -190,7 +190,7 @@ impl<'a, 'b> TmCompiler<'a, 'b> { TL::Item(S(INITIAL_STATE, _), item) => self.compile_initial_state(item, span), TL::Item(S(BLANK_SYMBOL, _), item) => self.compile_blank_symbol(item, span), TL::Item(S(name, dest_s), _) => { - self.ctx.emit_error(format!("unknown item {name:?}, expected states, symbols, final states, initial state, blank symbol"), dest_s); + self.ctx.emit_error(format!("unknown item {name:?}, expected states | symbols | final states | initial state | blank symbol"), dest_s); } TL::TransitionFunc(S((S(delta_lower!(pat), _), args), _), list) => { diff --git a/web/root/src/examples.ts b/web/root/src/examples.ts index 6745845..c3cd7e6 100644 --- a/web/root/src/examples.ts +++ b/web/root/src/examples.ts @@ -27,11 +27,11 @@ export const examples: readonly Example[] = [ "DFA", `// strings over a,b which start and end with different letters -type = DFA // type of machine DFA, NFA, DPDA, NPDA, DTM, NTM -Q = {q0, qa, qa', qb, qb'} // set of states -E = {a, b} // alphabet -F = {qa', qb'} // set of final states -q0 = q0 // initial state +type = DFA // type of machine DFA, NFA, DPDA, NPDA, DTM, NTM +Q = {q0, qa, qa', qb, qb'} // set of states +E = {a, b} // alphabet +F = {qa', qb'} // set of final states +q0 = q0 // initial state // transition function (state, letter) -> state d(q0, a) = qa @@ -81,11 +81,12 @@ d(q4, 3) = q2`, new Example( "DPDA", "unequal", - `type=DPDA -Q = {q0, qas, qeq, qmb, qlb} // states -E = {a, b} // alphabet -T = {z0, A} // stack -F = {qmb, qlb} // final states + `type = DPDA +Q = {q0, qas, qeq, qmb, qlb} // states +E = {a, b} // alphabet +T = {z0, A} // stack +F = {qmb, qlb} // final states +accept = F // accept by final state q0 = q0 z0 = z0 @@ -112,6 +113,7 @@ d(qmb, b, z0) = (qmb, z0)`, Q = {q0, q1} // states E = {a, b} // alphabet T = {z0, A, B} // stack +accept = E // accept by empty stack q0 = q0 z0 = z0 @@ -142,6 +144,7 @@ d(q1, b, B) = { (q1, epsilon) }`, Q = {q0, q1} // states E = {a, b} // alphabet T = {z0, A, B} // stack +accept = E // accept by empty stack q0 = q0 z0 = z0