diff --git a/Cargo.lock b/Cargo.lock index 6e21067..791b965 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,6 +23,8 @@ version = "0.1.0" dependencies = [ "serde", "serde_with", + "unicode-display-width", + "unicode-segmentation", ] [[package]] @@ -494,12 +496,27 @@ dependencies = [ "time-core", ] +[[package]] +name = "unicode-display-width" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a43273b656140aa2bb8e65351fe87c255f0eca706b2538a9bd4a590a3490bf3" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "unicode-ident" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "wasm-bindgen" version = "0.2.106" diff --git a/automata/Cargo.toml b/automata/Cargo.toml index ad078e8..8479da3 100644 --- a/automata/Cargo.toml +++ b/automata/Cargo.toml @@ -6,6 +6,8 @@ edition = "2024" [dependencies] serde = { version = "1.0", features = ["derive"], optional = true} serde_with = { version = "3.0", features = ["default"], optional = true} +unicode-display-width = "*" +unicode-segmentation = "*" [features] default = [] diff --git a/automata/src/automatan/fa.rs b/automata/src/automatan/fa.rs index 2dbefb0..20e5117 100644 --- a/automata/src/automatan/fa.rs +++ b/automata/src/automatan/fa.rs @@ -1,5 +1,3 @@ -use std::collections::HashSet; - use super::*; use crate::{ @@ -32,6 +30,16 @@ dual_struct_serde! { } } +#[derive(Hash, Clone, Copy, PartialEq, Eq)] +struct Transition<'a> { + pub state: State<'a>, +} + +struct TransitionInfo { + pub transition: Span, + pub function: Span, +} + dual_struct_serde! { {#[serde_with::serde_as]} #[derive(Clone, Debug)] pub struct Fa<'a> { @@ -49,7 +57,7 @@ dual_struct_serde! { {#[serde_with::serde_as]} #[serde(borrow)] #[serde_as(as = "serde_with::Seq<(_, _)>")] - pub transitions: HashMap, HashSet>>, + pub transitions: HashMap, Vec>>, } } @@ -78,7 +86,7 @@ pub struct FaCompiler<'a, 'b> { final_states: HashMap, StateInfo>, final_states_def: Option, - transitions: HashMap, HashSet>>, + transitions: HashMap, HashMap, TransitionInfo>>, } impl<'a, 'b> FaCompiler<'a, 'b> { @@ -160,7 +168,22 @@ impl<'a, 'b> FaCompiler<'a, 'b> { states: self.states, alphabet: self.alphabet, final_states: self.final_states, - transitions: self.transitions, + transitions: self + .transitions + .into_iter() + .map(|(k, v)| { + ( + k, + v.into_iter() + .map(|(k, v)| TransitionTo { + function: v.function, + state: k.state, + transition: v.transition, + }) + .collect(), + ) + }) + .collect(), }) } @@ -365,13 +388,17 @@ impl<'a, 'b> FaCompiler<'a, 'b> { && !self.options.non_deterministic { self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1) - .emit_info("previously defined here", entry.transition); + .emit_info("previously defined here", entry.1.transition); } - if let Some(previous) = entry.replace(TransitionTo { - state: State(next_state.0), - function, - transition: item.1, - }) { + if let Some(previous) = entry.insert( + Transition { + state: State(next_state.0), + }, + TransitionInfo { + function, + transition: item.1, + }, + ) { self.ctx .emit_warning("duplicate transition", item.1) .emit_info("previously defined here", previous.transition); diff --git a/automata/src/automatan/pda.rs b/automata/src/automatan/pda.rs index 5c2ec2a..528fd21 100644 --- a/automata/src/automatan/pda.rs +++ b/automata/src/automatan/pda.rs @@ -1,5 +1,3 @@ -use std::collections::HashSet; - use super::*; use crate::{ @@ -37,6 +35,17 @@ dual_struct_serde! { } } +#[derive(Hash, Clone, PartialEq, Eq)] +struct Transition<'a> { + pub state: State<'a>, + pub stack: Vec>, +} + +struct TransitionInfo { + pub transition: Span, + pub function: Span, +} + dual_struct_serde! { {#[serde_with::serde_as]} #[derive(Clone, Debug)] pub struct Pda<'a> { @@ -56,7 +65,7 @@ dual_struct_serde! { {#[serde_with::serde_as]} #[serde(borrow)] #[serde_as(as = "serde_with::Seq<(_, _)>")] - pub transitions: HashMap, HashSet>>, + pub transitions: HashMap, Vec>>, } } @@ -86,7 +95,7 @@ pub struct PdaCompiler<'a, 'b> { final_states: HashMap, StateInfo>, final_states_def: Option, - transitions: HashMap, HashSet>>, + transitions: HashMap, HashMap, TransitionInfo>>, } impl<'a> Pda<'a> { @@ -180,12 +189,14 @@ impl<'a, 'b> PdaCompiler<'a, 'b> { self.ctx .emit_error_locless("final states never defined") .emit_help_logless("add: F = {...}"); - }else if let (Some((AcceptBy::EmptyStack, empty)), Some(states)) = (self.accept_by, self.final_states_def){ + } else if let (Some((AcceptBy::EmptyStack, empty)), Some(states)) = + (self.accept_by, self.final_states_def) + { self.ctx .emit_error_locless("final states defined alongside accept by empty stack") .emit_help("either remote to accept by empty stack", states) .emit_help("or remote to accept by final state", empty); - } + } let initial_state = match self.initial_state { Some(some) => some.0, @@ -245,7 +256,23 @@ impl<'a, 'b> PdaCompiler<'a, 'b> { symbols: self.symbols, alphabet: self.alphabet, final_states, - transitions: self.transitions, + transitions: self + .transitions + .into_iter() + .map(|(k, v)| { + ( + k, + v.into_iter() + .map(|(k, v)| TransitionTo { + function: v.function, + state: k.state, + stack: k.stack, + transition: v.transition, + }) + .collect(), + ) + }) + .collect(), }) } @@ -554,17 +581,25 @@ impl<'a, 'b> PdaCompiler<'a, 'b> { symbol: Symbol(stack_symbol.0), }) .or_default(); - if !entry.is_empty() && !self.options.non_deterministic { - self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1); + if let Some(entry) = entry.iter().next() + && !self.options.non_deterministic + { + self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1) + .emit_info("previously defined here", entry.1.transition); } - if !entry.insert(TransitionTo { - state: State(next_state.0), - stack, - - function, - transition: item.1, - }) { - self.ctx.emit_warning("duplicate transition", item.1); + if let Some(previous) = entry.insert( + Transition { + state: State(next_state.0), + stack, + }, + TransitionInfo { + function, + transition: item.1, + }, + ) { + self.ctx + .emit_warning("duplicate transition", item.1) + .emit_info("previously defined here", previous.transition); } } } diff --git a/automata/src/automatan/tm.rs b/automata/src/automatan/tm.rs index a8838dc..fe04104 100644 --- a/automata/src/automatan/tm.rs +++ b/automata/src/automatan/tm.rs @@ -1,13 +1,12 @@ -use std::collections::HashSet; - use super::*; use crate::{ - delta_lower, dual_struct_serde, gamma_upper, loader::{ + delta_lower, dual_struct_serde, gamma_upper, + loader::{ BLANK_SYMBOL, Context, INITIAL_STATE, Spanned, ast::{self, Symbol as Sym}, log::LogSink, - } + }, }; dual_struct_serde! { #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] @@ -62,10 +61,22 @@ dual_struct_serde! {{#[serde_with::serde_as]} #[serde(borrow)] #[serde_as(as = "serde_with::Seq<(_, _)>")] - pub transitions: HashMap, HashSet>>, + pub transitions: HashMap, Vec>>, } } +#[derive(Hash, Clone, Copy, PartialEq, Eq)] +struct Transition<'a> { + pub state: State<'a>, + pub symbol: Symbol<'a>, + pub direction: Direction, +} + +struct TransitionInfo { + pub transition: Span, + pub function: Span, +} + impl<'a> Tm<'a> { pub fn compile( items: impl Iterator>>, @@ -92,7 +103,7 @@ pub struct TmCompiler<'a, 'b> { final_states: HashMap, StateInfo>, final_states_def: Option, - transitions: HashMap, HashSet>>, + transitions: HashMap, HashMap, TransitionInfo>>, } impl<'a, 'b> TmCompiler<'a, 'b> { @@ -178,7 +189,24 @@ impl<'a, 'b> TmCompiler<'a, 'b> { states: self.states, symbols: self.symbols, final_states: self.final_states, - transitions: self.transitions, + transitions: self + .transitions + .into_iter() + .map(|(k, v)| { + ( + k, + v.into_iter() + .map(|(k, v)| TransitionTo { + direction: k.direction, + function: v.function, + state: k.state, + symbol: k.symbol, + transition: v.transition, + }) + .collect(), + ) + }) + .collect(), }) } @@ -393,18 +421,26 @@ impl<'a, 'b> TmCompiler<'a, 'b> { symbol: Symbol(from_tape.0), }) .or_default(); - if !entry.is_empty() && !self.options.non_deterministic { - self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1); + if let Some(entry) = entry.iter().next() + && !self.options.non_deterministic + { + self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1) + .emit_info("previously defined here", entry.1.transition); } - if !entry.insert(TransitionTo { - state: State(to_state.0), - symbol: Symbol(to_tape.0), - direction: direction.0, - - function, - transition: item.1, - }) { - self.ctx.emit_warning("duplicate transition", item.1); + if let Some(previous) = entry.insert( + Transition { + state: State(to_state.0), + symbol: Symbol(to_tape.0), + direction: direction.0, + }, + TransitionInfo { + function, + transition: item.1, + }, + ) { + self.ctx + .emit_warning("duplicate transition", item.1) + .emit_info("previously defined here", previous.transition); } } } diff --git a/automata/src/loader/lexer.rs b/automata/src/loader/lexer.rs index 956b525..9cd51c4 100644 --- a/automata/src/loader/lexer.rs +++ b/automata/src/loader/lexer.rs @@ -1,11 +1,10 @@ use crate::loader::{Span, Spanned}; - #[derive(Clone, Copy, Hash, PartialEq, Eq, Debug, Default)] -pub enum StringKind{ +pub enum StringKind { #[default] Regular, - Regex + Regex, } #[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)] @@ -60,13 +59,17 @@ impl<'a> std::fmt::Display for Token<'a> { Token::LSmallArrow => write!(f, "'->'"), Token::LBigArrow => write!(f, "'=>'"), Token::Comment(_) => write!(f, ""), - + Token::Ident(ident) if f.alternate() => write!(f, "{ident:?}"), Token::Ident(_) => write!(f, "ident"), - - Token::String(string, kind, _) if f.alternate() => write!(f, "{}{string:?}", if *kind==StringKind::Regex {"r"} else {""}), + + Token::String(string, kind, _) if f.alternate() => write!( + f, + "{}{string:?}", + if *kind == StringKind::Regex { "r" } else { "" } + ), Token::String(_, _, _) => write!(f, "string"), - + Token::LineEnd => write!(f, "eol"), } } @@ -169,12 +172,18 @@ impl<'a> std::iter::Iterator for Lexer<'a> { let mut escaped = false; loop { match self.consume() { - Some('"') => break Ok(Token::String(&self.input[start+1..self.position], StringKind::Regular, escaped)), + Some('"') => { + break Ok(Token::String( + &self.input[start + 1..self.position], + StringKind::Regular, + escaped, + )); + } None => break Err(Error::UnclosedString), Some('\\') => { _ = self.consume(); escaped = true; - }, + } _ => {} } } diff --git a/automata/src/loader/log.rs b/automata/src/loader/log.rs index 4766072..742a7fa 100644 --- a/automata/src/loader/log.rs +++ b/automata/src/loader/log.rs @@ -1,5 +1,8 @@ use std::fmt::Display; +use unicode_display_width::width; +use unicode_segmentation::UnicodeSegmentation; + use crate::loader::Span; #[cfg_attr(feature = "serde", derive(serde::Serialize))] @@ -226,13 +229,17 @@ impl<'a> Display for LogEntryDisplay<'a> { for _ in 0..padding + 3 { write!(f, " ")?; } - for char in line.chars() { + for grapheme in line.graphemes(true) { if (span.0..span.1).contains(&index) { - write!(f, "~")?; + for _ in 0..width(grapheme){ + write!(f, "~")?; + } } else { - write!(f, " ")?; + for _ in 0..width(grapheme){ + write!(f, " ")?; + } } - index += char.len_utf8(); + index += grapheme.len(); } write!(f, "{RESET}")?; index += '\n'.len_utf8(); diff --git a/automata/src/loader/parser.rs b/automata/src/loader/parser.rs index c28df60..c546777 100644 --- a/automata/src/loader/parser.rs +++ b/automata/src/loader/parser.rs @@ -141,19 +141,26 @@ impl<'a, 'b> Parser<'a, 'b> { S(Tuple(items), start.join(end)) } - fn parse_as_string(&mut self, tok: S>) -> S>{ + fn parse_as_string(&mut self, tok: S>) -> S> { let (r, k, e, s) = match tok { S(T::String(r, k, e), s) => (r, k, e, s), S(t, s) => { - self.ctx.emit_error(format!("unexpected {:#} expected {:}", t, T::String("", Default::default(), false)), s); - return S("".into(), s) + self.ctx.emit_error( + format!( + "unexpected {:#} expected {:}", + t, + T::String("", Default::default(), false) + ), + s, + ); + return S("".into(), s); } }; S(r.into(), s) } - - fn parse_string(&mut self) -> S>{ + + fn parse_string(&mut self) -> S> { let tok = self.next_token(); self.parse_as_string(tok) } @@ -246,7 +253,7 @@ impl<'a, 'b> Parser<'a, 'b> { todo!() } - fn parse_as_production_unit(&mut self, tok: S>) -> S>{ + fn parse_as_production_unit(&mut self, tok: S>) -> S> { match tok { S(T::Tilde, r) => S(ProductionUnit::Epsilon("~"), r), S(T::Ident(repr @ epsilon!(pat)), r) => S(ProductionUnit::Epsilon(repr), r), @@ -266,15 +273,17 @@ impl<'a, 'b> Parser<'a, 'b> { S(ProductionUnit::Ident(""), span) } } - } - - fn parse_production_unit(&mut self) -> S>{ + + fn parse_production_unit(&mut self) -> S> { let tok = self.next_token(); self.parse_as_production_unit(tok) } - fn parse_production_rule(&mut self, S(sym, start): S>) -> Option>> { + fn parse_production_rule( + &mut self, + S(sym, start): S>, + ) -> Option>> { let mut lhs_group = ProductionGroup(vec![S(sym, start)]); let mut lhs_group_end = start; while !matches!(self.peek_token().0, T::LSmallArrow | T::LineEnd) { diff --git a/web_lib/src/lib.rs b/web_lib/src/lib.rs index e861b20..274cc96 100644 --- a/web_lib/src/lib.rs +++ b/web_lib/src/lib.rs @@ -1,12 +1,9 @@ use std::collections::HashMap; use automata::{ - delta_lower, epsilon, gamma_upper, - loader::{self, Context, Span, Spanned, lexer::Lexer}, - sigma_upper, + automatan::{fa::Fa, pda::Pda, tm::Tm}, delta_lower, epsilon, gamma_upper, loader::{self, Context, Machine, Span, Spanned, lexer::Lexer}, sigma_upper }; -use serde::Serialize; use wasm_bindgen::prelude::wasm_bindgen; #[wasm_bindgen] @@ -144,14 +141,6 @@ pub struct CompileLog { pub end: Option, } -#[derive(Serialize, Debug)] -pub struct Graph<'a> { - initial: &'a str, - final_states: Vec<&'a str>, - states: Vec<&'a str>, - transitions: HashMap, -} - #[wasm_bindgen(getter_with_clone)] pub struct CompileResult { pub log: Vec, @@ -159,12 +148,67 @@ pub struct CompileResult { pub machine: Option, } +trait FixupSpan{ + fn fixup(&mut self, func: impl FnMut(Span) -> Span); +} + +impl<'a> FixupSpan for Machine<'a>{ + fn fixup(&mut self, func: impl FnMut(Span) -> Span) { + match self{ + Machine::Fa(fa) => fa.fixup(func), + Machine::Pda(pda) => pda.fixup(func), + Machine::Tm(tm) => tm.fixup(func), + } + } +} +impl<'a> FixupSpan for Fa<'a>{ + fn fixup(&mut self, mut func: impl FnMut(Span) -> Span) { + self.alphabet.values_mut().for_each(|v| v.definition = func(v.definition)); + self.states.values_mut().for_each(|v| v.definition = func(v.definition)); + self.final_states.values_mut().for_each(|v| v.definition = func(v.definition)); + self.transitions.values_mut().flat_map(|v|v.iter_mut()).for_each(|e|{ + e.transition = func(e.transition); + e.function = func(e.function); + }); + } +} + +impl<'a> FixupSpan for Pda<'a>{ + fn fixup(&mut self, mut func: impl FnMut(Span) -> Span) { + self.alphabet.values_mut().for_each(|v| v.definition = func(v.definition)); + self.states.values_mut().for_each(|v| v.definition = func(v.definition)); + self.symbols.values_mut().for_each(|v| v.definition = func(v.definition)); + self.final_states.as_mut().unwrap_or(&mut HashMap::new()).values_mut().for_each(|v| v.definition = func(v.definition)); + self.transitions.values_mut().flat_map(|v|v.iter_mut()).for_each(|e|{ + e.transition = func(e.transition); + e.function = func(e.function); + }); + } +} + +impl<'a> FixupSpan for Tm<'a>{ + fn fixup(&mut self, mut func: impl FnMut(Span) -> Span) { + self.states.values_mut().for_each(|v| v.definition = func(v.definition)); + self.symbols.values_mut().for_each(|v| v.definition = func(v.definition)); + self.final_states.values_mut().for_each(|v| v.definition = func(v.definition)); + self.transitions.values_mut().flat_map(|v|v.iter_mut()).for_each(|e|{ + e.transition = func(e.transition); + e.function = func(e.function); + }); + } +} + + + #[wasm_bindgen] pub fn compile(input: &str) -> CompileResult { let mut ctx = Context::new(input); let result = automata::loader::parse_universal(&mut ctx); - let machine = result.map(|result| serde_json::to_string(&result).unwrap()); + let machine = result.map(|mut result| { + result.fixup(|span|Span(input[..span.0].chars().map(char::len_utf16).sum(), input[..span.1].chars().map(char::len_utf16).sum())); + serde_json::to_string(&result).unwrap() + }); use std::fmt::Write; let ansi_log = ctx.logs_display().fold(String::new(), |mut s, e| { @@ -185,10 +229,10 @@ pub fn compile(input: &str) -> CompileResult { message: e.message, start: e .span - .map(|span| input[..span.0].chars().map(char::len_utf16).count()), + .map(|span| input[..span.0].chars().map(char::len_utf16).sum()), end: e .span - .map(|span| input[..span.1].chars().map(char::len_utf16).count()), + .map(|span| input[..span.1].chars().map(char::len_utf16).sum()), }) .collect();