utf16 index correction

This commit is contained in:
ParkerTenBroeck 2026-01-14 14:51:57 -05:00
parent bffa67069d
commit d9e291e0ff
6 changed files with 240 additions and 80 deletions

View file

@ -1,5 +1,3 @@
use std::collections::HashSet;
use super::*; use super::*;
use crate::{ use crate::{
@ -32,6 +30,16 @@ dual_struct_serde! {
} }
} }
#[derive(Hash, Clone, Copy, PartialEq, Eq)]
struct Transition<'a> {
pub state: State<'a>,
}
struct TransitionInfo {
pub transition: Span,
pub function: Span,
}
dual_struct_serde! { {#[serde_with::serde_as]} dual_struct_serde! { {#[serde_with::serde_as]}
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Fa<'a> { pub struct Fa<'a> {
@ -49,7 +57,7 @@ dual_struct_serde! { {#[serde_with::serde_as]}
#[serde(borrow)] #[serde(borrow)]
#[serde_as(as = "serde_with::Seq<(_, _)>")] #[serde_as(as = "serde_with::Seq<(_, _)>")]
pub transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>, pub transitions: HashMap<TransitionFrom<'a>, Vec<TransitionTo<'a>>>,
} }
} }
@ -78,7 +86,7 @@ pub struct FaCompiler<'a, 'b> {
final_states: HashMap<State<'a>, StateInfo>, final_states: HashMap<State<'a>, StateInfo>,
final_states_def: Option<Span>, final_states_def: Option<Span>,
transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>, transitions: HashMap<TransitionFrom<'a>, HashMap<Transition<'a>, TransitionInfo>>,
} }
impl<'a, 'b> FaCompiler<'a, 'b> { impl<'a, 'b> FaCompiler<'a, 'b> {
@ -160,7 +168,22 @@ impl<'a, 'b> FaCompiler<'a, 'b> {
states: self.states, states: self.states,
alphabet: self.alphabet, alphabet: self.alphabet,
final_states: self.final_states, final_states: self.final_states,
transitions: self.transitions, transitions: self
.transitions
.into_iter()
.map(|(k, v)| {
(
k,
v.into_iter()
.map(|(k, v)| TransitionTo {
function: v.function,
state: k.state,
transition: v.transition,
})
.collect(),
)
})
.collect(),
}) })
} }
@ -365,13 +388,17 @@ impl<'a, 'b> FaCompiler<'a, 'b> {
&& !self.options.non_deterministic && !self.options.non_deterministic
{ {
self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1) self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1)
.emit_info("previously defined here", entry.transition); .emit_info("previously defined here", entry.1.transition);
} }
if let Some(previous) = entry.replace(TransitionTo { if let Some(previous) = entry.insert(
state: State(next_state.0), Transition {
function, state: State(next_state.0),
transition: item.1, },
}) { TransitionInfo {
function,
transition: item.1,
},
) {
self.ctx self.ctx
.emit_warning("duplicate transition", item.1) .emit_warning("duplicate transition", item.1)
.emit_info("previously defined here", previous.transition); .emit_info("previously defined here", previous.transition);

View file

@ -1,5 +1,3 @@
use std::collections::HashSet;
use super::*; use super::*;
use crate::{ use crate::{
@ -37,6 +35,17 @@ dual_struct_serde! {
} }
} }
#[derive(Hash, Clone, PartialEq, Eq)]
struct Transition<'a> {
pub state: State<'a>,
pub stack: Vec<Symbol<'a>>,
}
struct TransitionInfo {
pub transition: Span,
pub function: Span,
}
dual_struct_serde! { {#[serde_with::serde_as]} dual_struct_serde! { {#[serde_with::serde_as]}
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Pda<'a> { pub struct Pda<'a> {
@ -56,7 +65,7 @@ dual_struct_serde! { {#[serde_with::serde_as]}
#[serde(borrow)] #[serde(borrow)]
#[serde_as(as = "serde_with::Seq<(_, _)>")] #[serde_as(as = "serde_with::Seq<(_, _)>")]
pub transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>, pub transitions: HashMap<TransitionFrom<'a>, Vec<TransitionTo<'a>>>,
} }
} }
@ -86,7 +95,7 @@ pub struct PdaCompiler<'a, 'b> {
final_states: HashMap<State<'a>, StateInfo>, final_states: HashMap<State<'a>, StateInfo>,
final_states_def: Option<Span>, final_states_def: Option<Span>,
transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>, transitions: HashMap<TransitionFrom<'a>, HashMap<Transition<'a>, TransitionInfo>>,
} }
impl<'a> Pda<'a> { impl<'a> Pda<'a> {
@ -180,12 +189,14 @@ impl<'a, 'b> PdaCompiler<'a, 'b> {
self.ctx self.ctx
.emit_error_locless("final states never defined") .emit_error_locless("final states never defined")
.emit_help_logless("add: F = {...}"); .emit_help_logless("add: F = {...}");
}else if let (Some((AcceptBy::EmptyStack, empty)), Some(states)) = (self.accept_by, self.final_states_def){ } else if let (Some((AcceptBy::EmptyStack, empty)), Some(states)) =
(self.accept_by, self.final_states_def)
{
self.ctx self.ctx
.emit_error_locless("final states defined alongside accept by empty stack") .emit_error_locless("final states defined alongside accept by empty stack")
.emit_help("either remote to accept by empty stack", states) .emit_help("either remote to accept by empty stack", states)
.emit_help("or remote to accept by final state", empty); .emit_help("or remote to accept by final state", empty);
} }
let initial_state = match self.initial_state { let initial_state = match self.initial_state {
Some(some) => some.0, Some(some) => some.0,
@ -245,7 +256,23 @@ impl<'a, 'b> PdaCompiler<'a, 'b> {
symbols: self.symbols, symbols: self.symbols,
alphabet: self.alphabet, alphabet: self.alphabet,
final_states, final_states,
transitions: self.transitions, transitions: self
.transitions
.into_iter()
.map(|(k, v)| {
(
k,
v.into_iter()
.map(|(k, v)| TransitionTo {
function: v.function,
state: k.state,
stack: k.stack,
transition: v.transition,
})
.collect(),
)
})
.collect(),
}) })
} }
@ -554,17 +581,25 @@ impl<'a, 'b> PdaCompiler<'a, 'b> {
symbol: Symbol(stack_symbol.0), symbol: Symbol(stack_symbol.0),
}) })
.or_default(); .or_default();
if !entry.is_empty() && !self.options.non_deterministic { if let Some(entry) = entry.iter().next()
self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1); && !self.options.non_deterministic
{
self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1)
.emit_info("previously defined here", entry.1.transition);
} }
if !entry.insert(TransitionTo { if let Some(previous) = entry.insert(
state: State(next_state.0), Transition {
stack, state: State(next_state.0),
stack,
function, },
transition: item.1, TransitionInfo {
}) { function,
self.ctx.emit_warning("duplicate transition", item.1); transition: item.1,
},
) {
self.ctx
.emit_warning("duplicate transition", item.1)
.emit_info("previously defined here", previous.transition);
} }
} }
} }

View file

@ -1,13 +1,12 @@
use std::collections::HashSet;
use super::*; use super::*;
use crate::{ use crate::{
delta_lower, dual_struct_serde, gamma_upper, loader::{ delta_lower, dual_struct_serde, gamma_upper,
loader::{
BLANK_SYMBOL, Context, INITIAL_STATE, Spanned, BLANK_SYMBOL, Context, INITIAL_STATE, Spanned,
ast::{self, Symbol as Sym}, ast::{self, Symbol as Sym},
log::LogSink, log::LogSink,
} },
}; };
dual_struct_serde! { dual_struct_serde! {
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
@ -62,10 +61,22 @@ dual_struct_serde! {{#[serde_with::serde_as]}
#[serde(borrow)] #[serde(borrow)]
#[serde_as(as = "serde_with::Seq<(_, _)>")] #[serde_as(as = "serde_with::Seq<(_, _)>")]
pub transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>, pub transitions: HashMap<TransitionFrom<'a>, Vec<TransitionTo<'a>>>,
} }
} }
#[derive(Hash, Clone, Copy, PartialEq, Eq)]
struct Transition<'a> {
pub state: State<'a>,
pub symbol: Symbol<'a>,
pub direction: Direction,
}
struct TransitionInfo {
pub transition: Span,
pub function: Span,
}
impl<'a> Tm<'a> { impl<'a> Tm<'a> {
pub fn compile( pub fn compile(
items: impl Iterator<Item = Spanned<ast::TopLevel<'a>>>, items: impl Iterator<Item = Spanned<ast::TopLevel<'a>>>,
@ -92,7 +103,7 @@ pub struct TmCompiler<'a, 'b> {
final_states: HashMap<State<'a>, StateInfo>, final_states: HashMap<State<'a>, StateInfo>,
final_states_def: Option<Span>, final_states_def: Option<Span>,
transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>, transitions: HashMap<TransitionFrom<'a>, HashMap<Transition<'a>, TransitionInfo>>,
} }
impl<'a, 'b> TmCompiler<'a, 'b> { impl<'a, 'b> TmCompiler<'a, 'b> {
@ -178,7 +189,24 @@ impl<'a, 'b> TmCompiler<'a, 'b> {
states: self.states, states: self.states,
symbols: self.symbols, symbols: self.symbols,
final_states: self.final_states, final_states: self.final_states,
transitions: self.transitions, transitions: self
.transitions
.into_iter()
.map(|(k, v)| {
(
k,
v.into_iter()
.map(|(k, v)| TransitionTo {
direction: k.direction,
function: v.function,
state: k.state,
symbol: k.symbol,
transition: v.transition,
})
.collect(),
)
})
.collect(),
}) })
} }
@ -393,18 +421,26 @@ impl<'a, 'b> TmCompiler<'a, 'b> {
symbol: Symbol(from_tape.0), symbol: Symbol(from_tape.0),
}) })
.or_default(); .or_default();
if !entry.is_empty() && !self.options.non_deterministic { if let Some(entry) = entry.iter().next()
self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1); && !self.options.non_deterministic
{
self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1)
.emit_info("previously defined here", entry.1.transition);
} }
if !entry.insert(TransitionTo { if let Some(previous) = entry.insert(
state: State(to_state.0), Transition {
symbol: Symbol(to_tape.0), state: State(to_state.0),
direction: direction.0, symbol: Symbol(to_tape.0),
direction: direction.0,
function, },
transition: item.1, TransitionInfo {
}) { function,
self.ctx.emit_warning("duplicate transition", item.1); transition: item.1,
},
) {
self.ctx
.emit_warning("duplicate transition", item.1)
.emit_info("previously defined here", previous.transition);
} }
} }
} }

View file

@ -1,11 +1,10 @@
use crate::loader::{Span, Spanned}; use crate::loader::{Span, Spanned};
#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug, Default)] #[derive(Clone, Copy, Hash, PartialEq, Eq, Debug, Default)]
pub enum StringKind{ pub enum StringKind {
#[default] #[default]
Regular, Regular,
Regex Regex,
} }
#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)] #[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)]
@ -64,7 +63,11 @@ impl<'a> std::fmt::Display for Token<'a> {
Token::Ident(ident) if f.alternate() => write!(f, "{ident:?}"), Token::Ident(ident) if f.alternate() => write!(f, "{ident:?}"),
Token::Ident(_) => write!(f, "ident"), Token::Ident(_) => write!(f, "ident"),
Token::String(string, kind, _) if f.alternate() => write!(f, "{}{string:?}", if *kind==StringKind::Regex {"r"} else {""}), Token::String(string, kind, _) if f.alternate() => write!(
f,
"{}{string:?}",
if *kind == StringKind::Regex { "r" } else { "" }
),
Token::String(_, _, _) => write!(f, "string"), Token::String(_, _, _) => write!(f, "string"),
Token::LineEnd => write!(f, "eol"), Token::LineEnd => write!(f, "eol"),
@ -169,12 +172,18 @@ impl<'a> std::iter::Iterator for Lexer<'a> {
let mut escaped = false; let mut escaped = false;
loop { loop {
match self.consume() { match self.consume() {
Some('"') => break Ok(Token::String(&self.input[start+1..self.position], StringKind::Regular, escaped)), Some('"') => {
break Ok(Token::String(
&self.input[start + 1..self.position],
StringKind::Regular,
escaped,
));
}
None => break Err(Error::UnclosedString), None => break Err(Error::UnclosedString),
Some('\\') => { Some('\\') => {
_ = self.consume(); _ = self.consume();
escaped = true; escaped = true;
}, }
_ => {} _ => {}
} }
} }

View file

@ -141,19 +141,26 @@ impl<'a, 'b> Parser<'a, 'b> {
S(Tuple(items), start.join(end)) S(Tuple(items), start.join(end))
} }
fn parse_as_string(&mut self, tok: S<T<'a>>) -> S<Cow<'a, str>>{ fn parse_as_string(&mut self, tok: S<T<'a>>) -> S<Cow<'a, str>> {
let (r, k, e, s) = match tok { let (r, k, e, s) = match tok {
S(T::String(r, k, e), s) => (r, k, e, s), S(T::String(r, k, e), s) => (r, k, e, s),
S(t, s) => { S(t, s) => {
self.ctx.emit_error(format!("unexpected {:#} expected {:}", t, T::String("", Default::default(), false)), s); self.ctx.emit_error(
return S("<INVALID>".into(), s) format!(
"unexpected {:#} expected {:}",
t,
T::String("", Default::default(), false)
),
s,
);
return S("<INVALID>".into(), s);
} }
}; };
S(r.into(), s) S(r.into(), s)
} }
fn parse_string(&mut self) -> S<Cow<'a, str>>{ fn parse_string(&mut self) -> S<Cow<'a, str>> {
let tok = self.next_token(); let tok = self.next_token();
self.parse_as_string(tok) self.parse_as_string(tok)
} }
@ -246,7 +253,7 @@ impl<'a, 'b> Parser<'a, 'b> {
todo!() todo!()
} }
fn parse_as_production_unit(&mut self, tok: S<T<'a>>) -> S<ProductionUnit<'a>>{ fn parse_as_production_unit(&mut self, tok: S<T<'a>>) -> S<ProductionUnit<'a>> {
match tok { match tok {
S(T::Tilde, r) => S(ProductionUnit::Epsilon("~"), r), S(T::Tilde, r) => S(ProductionUnit::Epsilon("~"), r),
S(T::Ident(repr @ epsilon!(pat)), r) => S(ProductionUnit::Epsilon(repr), r), S(T::Ident(repr @ epsilon!(pat)), r) => S(ProductionUnit::Epsilon(repr), r),
@ -266,15 +273,17 @@ impl<'a, 'b> Parser<'a, 'b> {
S(ProductionUnit::Ident("<INVALID>"), span) S(ProductionUnit::Ident("<INVALID>"), span)
} }
} }
} }
fn parse_production_unit(&mut self) -> S<ProductionUnit<'a>>{ fn parse_production_unit(&mut self) -> S<ProductionUnit<'a>> {
let tok = self.next_token(); let tok = self.next_token();
self.parse_as_production_unit(tok) self.parse_as_production_unit(tok)
} }
fn parse_production_rule(&mut self, S(sym, start): S<ProductionUnit<'a>>) -> Option<S<TopLevel<'a>>> { fn parse_production_rule(
&mut self,
S(sym, start): S<ProductionUnit<'a>>,
) -> Option<S<TopLevel<'a>>> {
let mut lhs_group = ProductionGroup(vec![S(sym, start)]); let mut lhs_group = ProductionGroup(vec![S(sym, start)]);
let mut lhs_group_end = start; let mut lhs_group_end = start;
while !matches!(self.peek_token().0, T::LSmallArrow | T::LineEnd) { while !matches!(self.peek_token().0, T::LSmallArrow | T::LineEnd) {

View file

@ -1,12 +1,9 @@
use std::collections::HashMap; use std::collections::HashMap;
use automata::{ use automata::{
delta_lower, epsilon, gamma_upper, automatan::{fa::Fa, pda::Pda, tm::Tm}, delta_lower, epsilon, gamma_upper, loader::{self, Context, Machine, Span, Spanned, lexer::Lexer}, sigma_upper
loader::{self, Context, Span, Spanned, lexer::Lexer},
sigma_upper,
}; };
use serde::Serialize;
use wasm_bindgen::prelude::wasm_bindgen; use wasm_bindgen::prelude::wasm_bindgen;
#[wasm_bindgen] #[wasm_bindgen]
@ -144,14 +141,6 @@ pub struct CompileLog {
pub end: Option<usize>, pub end: Option<usize>,
} }
#[derive(Serialize, Debug)]
pub struct Graph<'a> {
initial: &'a str,
final_states: Vec<&'a str>,
states: Vec<&'a str>,
transitions: HashMap<String, String>,
}
#[wasm_bindgen(getter_with_clone)] #[wasm_bindgen(getter_with_clone)]
pub struct CompileResult { pub struct CompileResult {
pub log: Vec<CompileLog>, pub log: Vec<CompileLog>,
@ -159,12 +148,67 @@ pub struct CompileResult {
pub machine: Option<String>, pub machine: Option<String>,
} }
trait FixupSpan{
fn fixup(&mut self, func: impl FnMut(Span) -> Span);
}
impl<'a> FixupSpan for Machine<'a>{
fn fixup(&mut self, func: impl FnMut(Span) -> Span) {
match self{
Machine::Fa(fa) => fa.fixup(func),
Machine::Pda(pda) => pda.fixup(func),
Machine::Tm(tm) => tm.fixup(func),
}
}
}
impl<'a> FixupSpan for Fa<'a>{
fn fixup(&mut self, mut func: impl FnMut(Span) -> Span) {
self.alphabet.values_mut().for_each(|v| v.definition = func(v.definition));
self.states.values_mut().for_each(|v| v.definition = func(v.definition));
self.final_states.values_mut().for_each(|v| v.definition = func(v.definition));
self.transitions.values_mut().flat_map(|v|v.iter_mut()).for_each(|e|{
e.transition = func(e.transition);
e.function = func(e.function);
});
}
}
impl<'a> FixupSpan for Pda<'a>{
fn fixup(&mut self, mut func: impl FnMut(Span) -> Span) {
self.alphabet.values_mut().for_each(|v| v.definition = func(v.definition));
self.states.values_mut().for_each(|v| v.definition = func(v.definition));
self.symbols.values_mut().for_each(|v| v.definition = func(v.definition));
self.final_states.as_mut().unwrap_or(&mut HashMap::new()).values_mut().for_each(|v| v.definition = func(v.definition));
self.transitions.values_mut().flat_map(|v|v.iter_mut()).for_each(|e|{
e.transition = func(e.transition);
e.function = func(e.function);
});
}
}
impl<'a> FixupSpan for Tm<'a>{
fn fixup(&mut self, mut func: impl FnMut(Span) -> Span) {
self.states.values_mut().for_each(|v| v.definition = func(v.definition));
self.symbols.values_mut().for_each(|v| v.definition = func(v.definition));
self.final_states.values_mut().for_each(|v| v.definition = func(v.definition));
self.transitions.values_mut().flat_map(|v|v.iter_mut()).for_each(|e|{
e.transition = func(e.transition);
e.function = func(e.function);
});
}
}
#[wasm_bindgen] #[wasm_bindgen]
pub fn compile(input: &str) -> CompileResult { pub fn compile(input: &str) -> CompileResult {
let mut ctx = Context::new(input); let mut ctx = Context::new(input);
let result = automata::loader::parse_universal(&mut ctx); let result = automata::loader::parse_universal(&mut ctx);
let machine = result.map(|result| serde_json::to_string(&result).unwrap()); let machine = result.map(|mut result| {
result.fixup(|span|Span(input[..span.0].chars().map(char::len_utf16).sum(), input[..span.1].chars().map(char::len_utf16).sum()));
serde_json::to_string(&result).unwrap()
});
use std::fmt::Write; use std::fmt::Write;
let ansi_log = ctx.logs_display().fold(String::new(), |mut s, e| { let ansi_log = ctx.logs_display().fold(String::new(), |mut s, e| {
@ -185,10 +229,10 @@ pub fn compile(input: &str) -> CompileResult {
message: e.message, message: e.message,
start: e start: e
.span .span
.map(|span| input[..span.0].chars().map(char::len_utf16).count()), .map(|span| input[..span.0].chars().map(char::len_utf16).sum()),
end: e end: e
.span .span
.map(|span| input[..span.1].chars().map(char::len_utf16).count()), .map(|span| input[..span.1].chars().map(char::len_utf16).sum()),
}) })
.collect(); .collect();