Merge branch 'main' into gh-pages

This commit is contained in:
ParkerTenBroeck 2026-01-14 15:03:14 -05:00
commit 64bbee9524
9 changed files with 270 additions and 84 deletions

17
Cargo.lock generated
View file

@ -23,6 +23,8 @@ version = "0.1.0"
dependencies = [
"serde",
"serde_with",
"unicode-display-width",
"unicode-segmentation",
]
[[package]]
@ -494,12 +496,27 @@ dependencies = [
"time-core",
]
[[package]]
name = "unicode-display-width"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a43273b656140aa2bb8e65351fe87c255f0eca706b2538a9bd4a590a3490bf3"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "unicode-ident"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
[[package]]
name = "unicode-segmentation"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]]
name = "wasm-bindgen"
version = "0.2.106"

View file

@ -6,6 +6,8 @@ edition = "2024"
[dependencies]
serde = { version = "1.0", features = ["derive"], optional = true}
serde_with = { version = "3.0", features = ["default"], optional = true}
unicode-display-width = "*"
unicode-segmentation = "*"
[features]
default = []

View file

@ -1,5 +1,3 @@
use std::collections::HashSet;
use super::*;
use crate::{
@ -32,6 +30,16 @@ dual_struct_serde! {
}
}
#[derive(Hash, Clone, Copy, PartialEq, Eq)]
struct Transition<'a> {
pub state: State<'a>,
}
struct TransitionInfo {
pub transition: Span,
pub function: Span,
}
dual_struct_serde! { {#[serde_with::serde_as]}
#[derive(Clone, Debug)]
pub struct Fa<'a> {
@ -49,7 +57,7 @@ dual_struct_serde! { {#[serde_with::serde_as]}
#[serde(borrow)]
#[serde_as(as = "serde_with::Seq<(_, _)>")]
pub transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>,
pub transitions: HashMap<TransitionFrom<'a>, Vec<TransitionTo<'a>>>,
}
}
@ -78,7 +86,7 @@ pub struct FaCompiler<'a, 'b> {
final_states: HashMap<State<'a>, StateInfo>,
final_states_def: Option<Span>,
transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>,
transitions: HashMap<TransitionFrom<'a>, HashMap<Transition<'a>, TransitionInfo>>,
}
impl<'a, 'b> FaCompiler<'a, 'b> {
@ -160,7 +168,22 @@ impl<'a, 'b> FaCompiler<'a, 'b> {
states: self.states,
alphabet: self.alphabet,
final_states: self.final_states,
transitions: self.transitions,
transitions: self
.transitions
.into_iter()
.map(|(k, v)| {
(
k,
v.into_iter()
.map(|(k, v)| TransitionTo {
function: v.function,
state: k.state,
transition: v.transition,
})
.collect(),
)
})
.collect(),
})
}
@ -365,13 +388,17 @@ impl<'a, 'b> FaCompiler<'a, 'b> {
&& !self.options.non_deterministic
{
self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1)
.emit_info("previously defined here", entry.transition);
.emit_info("previously defined here", entry.1.transition);
}
if let Some(previous) = entry.replace(TransitionTo {
if let Some(previous) = entry.insert(
Transition {
state: State(next_state.0),
},
TransitionInfo {
function,
transition: item.1,
}) {
},
) {
self.ctx
.emit_warning("duplicate transition", item.1)
.emit_info("previously defined here", previous.transition);

View file

@ -1,5 +1,3 @@
use std::collections::HashSet;
use super::*;
use crate::{
@ -37,6 +35,17 @@ dual_struct_serde! {
}
}
#[derive(Hash, Clone, PartialEq, Eq)]
struct Transition<'a> {
pub state: State<'a>,
pub stack: Vec<Symbol<'a>>,
}
struct TransitionInfo {
pub transition: Span,
pub function: Span,
}
dual_struct_serde! { {#[serde_with::serde_as]}
#[derive(Clone, Debug)]
pub struct Pda<'a> {
@ -56,7 +65,7 @@ dual_struct_serde! { {#[serde_with::serde_as]}
#[serde(borrow)]
#[serde_as(as = "serde_with::Seq<(_, _)>")]
pub transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>,
pub transitions: HashMap<TransitionFrom<'a>, Vec<TransitionTo<'a>>>,
}
}
@ -86,7 +95,7 @@ pub struct PdaCompiler<'a, 'b> {
final_states: HashMap<State<'a>, StateInfo>,
final_states_def: Option<Span>,
transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>,
transitions: HashMap<TransitionFrom<'a>, HashMap<Transition<'a>, TransitionInfo>>,
}
impl<'a> Pda<'a> {
@ -180,7 +189,9 @@ impl<'a, 'b> PdaCompiler<'a, 'b> {
self.ctx
.emit_error_locless("final states never defined")
.emit_help_logless("add: F = {...}");
}else if let (Some((AcceptBy::EmptyStack, empty)), Some(states)) = (self.accept_by, self.final_states_def){
} else if let (Some((AcceptBy::EmptyStack, empty)), Some(states)) =
(self.accept_by, self.final_states_def)
{
self.ctx
.emit_error_locless("final states defined alongside accept by empty stack")
.emit_help("either remote to accept by empty stack", states)
@ -245,7 +256,23 @@ impl<'a, 'b> PdaCompiler<'a, 'b> {
symbols: self.symbols,
alphabet: self.alphabet,
final_states,
transitions: self.transitions,
transitions: self
.transitions
.into_iter()
.map(|(k, v)| {
(
k,
v.into_iter()
.map(|(k, v)| TransitionTo {
function: v.function,
state: k.state,
stack: k.stack,
transition: v.transition,
})
.collect(),
)
})
.collect(),
})
}
@ -554,17 +581,25 @@ impl<'a, 'b> PdaCompiler<'a, 'b> {
symbol: Symbol(stack_symbol.0),
})
.or_default();
if !entry.is_empty() && !self.options.non_deterministic {
self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1);
if let Some(entry) = entry.iter().next()
&& !self.options.non_deterministic
{
self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1)
.emit_info("previously defined here", entry.1.transition);
}
if !entry.insert(TransitionTo {
if let Some(previous) = entry.insert(
Transition {
state: State(next_state.0),
stack,
},
TransitionInfo {
function,
transition: item.1,
}) {
self.ctx.emit_warning("duplicate transition", item.1);
},
) {
self.ctx
.emit_warning("duplicate transition", item.1)
.emit_info("previously defined here", previous.transition);
}
}
}

View file

@ -1,13 +1,12 @@
use std::collections::HashSet;
use super::*;
use crate::{
delta_lower, dual_struct_serde, gamma_upper, loader::{
delta_lower, dual_struct_serde, gamma_upper,
loader::{
BLANK_SYMBOL, Context, INITIAL_STATE, Spanned,
ast::{self, Symbol as Sym},
log::LogSink,
}
},
};
dual_struct_serde! {
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
@ -62,10 +61,22 @@ dual_struct_serde! {{#[serde_with::serde_as]}
#[serde(borrow)]
#[serde_as(as = "serde_with::Seq<(_, _)>")]
pub transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>,
pub transitions: HashMap<TransitionFrom<'a>, Vec<TransitionTo<'a>>>,
}
}
#[derive(Hash, Clone, Copy, PartialEq, Eq)]
struct Transition<'a> {
pub state: State<'a>,
pub symbol: Symbol<'a>,
pub direction: Direction,
}
struct TransitionInfo {
pub transition: Span,
pub function: Span,
}
impl<'a> Tm<'a> {
pub fn compile(
items: impl Iterator<Item = Spanned<ast::TopLevel<'a>>>,
@ -92,7 +103,7 @@ pub struct TmCompiler<'a, 'b> {
final_states: HashMap<State<'a>, StateInfo>,
final_states_def: Option<Span>,
transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>,
transitions: HashMap<TransitionFrom<'a>, HashMap<Transition<'a>, TransitionInfo>>,
}
impl<'a, 'b> TmCompiler<'a, 'b> {
@ -178,7 +189,24 @@ impl<'a, 'b> TmCompiler<'a, 'b> {
states: self.states,
symbols: self.symbols,
final_states: self.final_states,
transitions: self.transitions,
transitions: self
.transitions
.into_iter()
.map(|(k, v)| {
(
k,
v.into_iter()
.map(|(k, v)| TransitionTo {
direction: k.direction,
function: v.function,
state: k.state,
symbol: k.symbol,
transition: v.transition,
})
.collect(),
)
})
.collect(),
})
}
@ -393,18 +421,26 @@ impl<'a, 'b> TmCompiler<'a, 'b> {
symbol: Symbol(from_tape.0),
})
.or_default();
if !entry.is_empty() && !self.options.non_deterministic {
self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1);
if let Some(entry) = entry.iter().next()
&& !self.options.non_deterministic
{
self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1)
.emit_info("previously defined here", entry.1.transition);
}
if !entry.insert(TransitionTo {
if let Some(previous) = entry.insert(
Transition {
state: State(to_state.0),
symbol: Symbol(to_tape.0),
direction: direction.0,
},
TransitionInfo {
function,
transition: item.1,
}) {
self.ctx.emit_warning("duplicate transition", item.1);
},
) {
self.ctx
.emit_warning("duplicate transition", item.1)
.emit_info("previously defined here", previous.transition);
}
}
}

View file

@ -1,11 +1,10 @@
use crate::loader::{Span, Spanned};
#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug, Default)]
pub enum StringKind {
#[default]
Regular,
Regex
Regex,
}
#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)]
@ -64,7 +63,11 @@ impl<'a> std::fmt::Display for Token<'a> {
Token::Ident(ident) if f.alternate() => write!(f, "{ident:?}"),
Token::Ident(_) => write!(f, "ident"),
Token::String(string, kind, _) if f.alternate() => write!(f, "{}{string:?}", if *kind==StringKind::Regex {"r"} else {""}),
Token::String(string, kind, _) if f.alternate() => write!(
f,
"{}{string:?}",
if *kind == StringKind::Regex { "r" } else { "" }
),
Token::String(_, _, _) => write!(f, "string"),
Token::LineEnd => write!(f, "eol"),
@ -169,12 +172,18 @@ impl<'a> std::iter::Iterator for Lexer<'a> {
let mut escaped = false;
loop {
match self.consume() {
Some('"') => break Ok(Token::String(&self.input[start+1..self.position], StringKind::Regular, escaped)),
Some('"') => {
break Ok(Token::String(
&self.input[start + 1..self.position],
StringKind::Regular,
escaped,
));
}
None => break Err(Error::UnclosedString),
Some('\\') => {
_ = self.consume();
escaped = true;
},
}
_ => {}
}
}

View file

@ -1,5 +1,8 @@
use std::fmt::Display;
use unicode_display_width::width;
use unicode_segmentation::UnicodeSegmentation;
use crate::loader::Span;
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
@ -226,13 +229,17 @@ impl<'a> Display for LogEntryDisplay<'a> {
for _ in 0..padding + 3 {
write!(f, " ")?;
}
for char in line.chars() {
for grapheme in line.graphemes(true) {
if (span.0..span.1).contains(&index) {
for _ in 0..width(grapheme){
write!(f, "~")?;
}
} else {
for _ in 0..width(grapheme){
write!(f, " ")?;
}
index += char.len_utf8();
}
index += grapheme.len();
}
write!(f, "{RESET}")?;
index += '\n'.len_utf8();

View file

@ -145,8 +145,15 @@ impl<'a, 'b> Parser<'a, 'b> {
let (r, k, e, s) = match tok {
S(T::String(r, k, e), s) => (r, k, e, s),
S(t, s) => {
self.ctx.emit_error(format!("unexpected {:#} expected {:}", t, T::String("", Default::default(), false)), s);
return S("<INVALID>".into(), s)
self.ctx.emit_error(
format!(
"unexpected {:#} expected {:}",
t,
T::String("", Default::default(), false)
),
s,
);
return S("<INVALID>".into(), s);
}
};
@ -266,7 +273,6 @@ impl<'a, 'b> Parser<'a, 'b> {
S(ProductionUnit::Ident("<INVALID>"), span)
}
}
}
fn parse_production_unit(&mut self) -> S<ProductionUnit<'a>> {
@ -274,7 +280,10 @@ impl<'a, 'b> Parser<'a, 'b> {
self.parse_as_production_unit(tok)
}
fn parse_production_rule(&mut self, S(sym, start): S<ProductionUnit<'a>>) -> Option<S<TopLevel<'a>>> {
fn parse_production_rule(
&mut self,
S(sym, start): S<ProductionUnit<'a>>,
) -> Option<S<TopLevel<'a>>> {
let mut lhs_group = ProductionGroup(vec![S(sym, start)]);
let mut lhs_group_end = start;
while !matches!(self.peek_token().0, T::LSmallArrow | T::LineEnd) {

View file

@ -1,12 +1,9 @@
use std::collections::HashMap;
use automata::{
delta_lower, epsilon, gamma_upper,
loader::{self, Context, Span, Spanned, lexer::Lexer},
sigma_upper,
automatan::{fa::Fa, pda::Pda, tm::Tm}, delta_lower, epsilon, gamma_upper, loader::{self, Context, Machine, Span, Spanned, lexer::Lexer}, sigma_upper
};
use serde::Serialize;
use wasm_bindgen::prelude::wasm_bindgen;
#[wasm_bindgen]
@ -144,14 +141,6 @@ pub struct CompileLog {
pub end: Option<usize>,
}
#[derive(Serialize, Debug)]
pub struct Graph<'a> {
initial: &'a str,
final_states: Vec<&'a str>,
states: Vec<&'a str>,
transitions: HashMap<String, String>,
}
#[wasm_bindgen(getter_with_clone)]
pub struct CompileResult {
pub log: Vec<CompileLog>,
@ -159,12 +148,67 @@ pub struct CompileResult {
pub machine: Option<String>,
}
trait FixupSpan{
fn fixup(&mut self, func: impl FnMut(Span) -> Span);
}
impl<'a> FixupSpan for Machine<'a>{
fn fixup(&mut self, func: impl FnMut(Span) -> Span) {
match self{
Machine::Fa(fa) => fa.fixup(func),
Machine::Pda(pda) => pda.fixup(func),
Machine::Tm(tm) => tm.fixup(func),
}
}
}
impl<'a> FixupSpan for Fa<'a>{
fn fixup(&mut self, mut func: impl FnMut(Span) -> Span) {
self.alphabet.values_mut().for_each(|v| v.definition = func(v.definition));
self.states.values_mut().for_each(|v| v.definition = func(v.definition));
self.final_states.values_mut().for_each(|v| v.definition = func(v.definition));
self.transitions.values_mut().flat_map(|v|v.iter_mut()).for_each(|e|{
e.transition = func(e.transition);
e.function = func(e.function);
});
}
}
impl<'a> FixupSpan for Pda<'a>{
fn fixup(&mut self, mut func: impl FnMut(Span) -> Span) {
self.alphabet.values_mut().for_each(|v| v.definition = func(v.definition));
self.states.values_mut().for_each(|v| v.definition = func(v.definition));
self.symbols.values_mut().for_each(|v| v.definition = func(v.definition));
self.final_states.as_mut().unwrap_or(&mut HashMap::new()).values_mut().for_each(|v| v.definition = func(v.definition));
self.transitions.values_mut().flat_map(|v|v.iter_mut()).for_each(|e|{
e.transition = func(e.transition);
e.function = func(e.function);
});
}
}
impl<'a> FixupSpan for Tm<'a>{
fn fixup(&mut self, mut func: impl FnMut(Span) -> Span) {
self.states.values_mut().for_each(|v| v.definition = func(v.definition));
self.symbols.values_mut().for_each(|v| v.definition = func(v.definition));
self.final_states.values_mut().for_each(|v| v.definition = func(v.definition));
self.transitions.values_mut().flat_map(|v|v.iter_mut()).for_each(|e|{
e.transition = func(e.transition);
e.function = func(e.function);
});
}
}
#[wasm_bindgen]
pub fn compile(input: &str) -> CompileResult {
let mut ctx = Context::new(input);
let result = automata::loader::parse_universal(&mut ctx);
let machine = result.map(|result| serde_json::to_string(&result).unwrap());
let machine = result.map(|mut result| {
result.fixup(|span|Span(input[..span.0].chars().map(char::len_utf16).sum(), input[..span.1].chars().map(char::len_utf16).sum()));
serde_json::to_string(&result).unwrap()
});
use std::fmt::Write;
let ansi_log = ctx.logs_display().fold(String::new(), |mut s, e| {
@ -185,10 +229,10 @@ pub fn compile(input: &str) -> CompileResult {
message: e.message,
start: e
.span
.map(|span| input[..span.0].chars().map(char::len_utf16).count()),
.map(|span| input[..span.0].chars().map(char::len_utf16).sum()),
end: e
.span
.map(|span| input[..span.1].chars().map(char::len_utf16).count()),
.map(|span| input[..span.1].chars().map(char::len_utf16).sum()),
})
.collect();