mirror of
https://github.com/ParkerTenBroeck/automata.git
synced 2026-06-07 05:28:45 -04:00
utf16 index correction
This commit is contained in:
parent
bffa67069d
commit
d9e291e0ff
6 changed files with 240 additions and 80 deletions
|
|
@ -1,5 +1,3 @@
|
|||
use std::collections::HashSet;
|
||||
|
||||
use super::*;
|
||||
|
||||
use crate::{
|
||||
|
|
@ -32,6 +30,16 @@ dual_struct_serde! {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Hash, Clone, Copy, PartialEq, Eq)]
|
||||
struct Transition<'a> {
|
||||
pub state: State<'a>,
|
||||
}
|
||||
|
||||
struct TransitionInfo {
|
||||
pub transition: Span,
|
||||
pub function: Span,
|
||||
}
|
||||
|
||||
dual_struct_serde! { {#[serde_with::serde_as]}
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Fa<'a> {
|
||||
|
|
@ -49,7 +57,7 @@ dual_struct_serde! { {#[serde_with::serde_as]}
|
|||
|
||||
#[serde(borrow)]
|
||||
#[serde_as(as = "serde_with::Seq<(_, _)>")]
|
||||
pub transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>,
|
||||
pub transitions: HashMap<TransitionFrom<'a>, Vec<TransitionTo<'a>>>,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -78,7 +86,7 @@ pub struct FaCompiler<'a, 'b> {
|
|||
final_states: HashMap<State<'a>, StateInfo>,
|
||||
final_states_def: Option<Span>,
|
||||
|
||||
transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>,
|
||||
transitions: HashMap<TransitionFrom<'a>, HashMap<Transition<'a>, TransitionInfo>>,
|
||||
}
|
||||
|
||||
impl<'a, 'b> FaCompiler<'a, 'b> {
|
||||
|
|
@ -160,7 +168,22 @@ impl<'a, 'b> FaCompiler<'a, 'b> {
|
|||
states: self.states,
|
||||
alphabet: self.alphabet,
|
||||
final_states: self.final_states,
|
||||
transitions: self.transitions,
|
||||
transitions: self
|
||||
.transitions
|
||||
.into_iter()
|
||||
.map(|(k, v)| {
|
||||
(
|
||||
k,
|
||||
v.into_iter()
|
||||
.map(|(k, v)| TransitionTo {
|
||||
function: v.function,
|
||||
state: k.state,
|
||||
transition: v.transition,
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -365,13 +388,17 @@ impl<'a, 'b> FaCompiler<'a, 'b> {
|
|||
&& !self.options.non_deterministic
|
||||
{
|
||||
self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1)
|
||||
.emit_info("previously defined here", entry.transition);
|
||||
.emit_info("previously defined here", entry.1.transition);
|
||||
}
|
||||
if let Some(previous) = entry.replace(TransitionTo {
|
||||
if let Some(previous) = entry.insert(
|
||||
Transition {
|
||||
state: State(next_state.0),
|
||||
},
|
||||
TransitionInfo {
|
||||
function,
|
||||
transition: item.1,
|
||||
}) {
|
||||
},
|
||||
) {
|
||||
self.ctx
|
||||
.emit_warning("duplicate transition", item.1)
|
||||
.emit_info("previously defined here", previous.transition);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
use std::collections::HashSet;
|
||||
|
||||
use super::*;
|
||||
|
||||
use crate::{
|
||||
|
|
@ -37,6 +35,17 @@ dual_struct_serde! {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Hash, Clone, PartialEq, Eq)]
|
||||
struct Transition<'a> {
|
||||
pub state: State<'a>,
|
||||
pub stack: Vec<Symbol<'a>>,
|
||||
}
|
||||
|
||||
struct TransitionInfo {
|
||||
pub transition: Span,
|
||||
pub function: Span,
|
||||
}
|
||||
|
||||
dual_struct_serde! { {#[serde_with::serde_as]}
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Pda<'a> {
|
||||
|
|
@ -56,7 +65,7 @@ dual_struct_serde! { {#[serde_with::serde_as]}
|
|||
|
||||
#[serde(borrow)]
|
||||
#[serde_as(as = "serde_with::Seq<(_, _)>")]
|
||||
pub transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>,
|
||||
pub transitions: HashMap<TransitionFrom<'a>, Vec<TransitionTo<'a>>>,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -86,7 +95,7 @@ pub struct PdaCompiler<'a, 'b> {
|
|||
final_states: HashMap<State<'a>, StateInfo>,
|
||||
final_states_def: Option<Span>,
|
||||
|
||||
transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>,
|
||||
transitions: HashMap<TransitionFrom<'a>, HashMap<Transition<'a>, TransitionInfo>>,
|
||||
}
|
||||
|
||||
impl<'a> Pda<'a> {
|
||||
|
|
@ -180,7 +189,9 @@ impl<'a, 'b> PdaCompiler<'a, 'b> {
|
|||
self.ctx
|
||||
.emit_error_locless("final states never defined")
|
||||
.emit_help_logless("add: F = {...}");
|
||||
}else if let (Some((AcceptBy::EmptyStack, empty)), Some(states)) = (self.accept_by, self.final_states_def){
|
||||
} else if let (Some((AcceptBy::EmptyStack, empty)), Some(states)) =
|
||||
(self.accept_by, self.final_states_def)
|
||||
{
|
||||
self.ctx
|
||||
.emit_error_locless("final states defined alongside accept by empty stack")
|
||||
.emit_help("either remote to accept by empty stack", states)
|
||||
|
|
@ -245,7 +256,23 @@ impl<'a, 'b> PdaCompiler<'a, 'b> {
|
|||
symbols: self.symbols,
|
||||
alphabet: self.alphabet,
|
||||
final_states,
|
||||
transitions: self.transitions,
|
||||
transitions: self
|
||||
.transitions
|
||||
.into_iter()
|
||||
.map(|(k, v)| {
|
||||
(
|
||||
k,
|
||||
v.into_iter()
|
||||
.map(|(k, v)| TransitionTo {
|
||||
function: v.function,
|
||||
state: k.state,
|
||||
stack: k.stack,
|
||||
transition: v.transition,
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -554,17 +581,25 @@ impl<'a, 'b> PdaCompiler<'a, 'b> {
|
|||
symbol: Symbol(stack_symbol.0),
|
||||
})
|
||||
.or_default();
|
||||
if !entry.is_empty() && !self.options.non_deterministic {
|
||||
self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1);
|
||||
if let Some(entry) = entry.iter().next()
|
||||
&& !self.options.non_deterministic
|
||||
{
|
||||
self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1)
|
||||
.emit_info("previously defined here", entry.1.transition);
|
||||
}
|
||||
if !entry.insert(TransitionTo {
|
||||
if let Some(previous) = entry.insert(
|
||||
Transition {
|
||||
state: State(next_state.0),
|
||||
stack,
|
||||
|
||||
},
|
||||
TransitionInfo {
|
||||
function,
|
||||
transition: item.1,
|
||||
}) {
|
||||
self.ctx.emit_warning("duplicate transition", item.1);
|
||||
},
|
||||
) {
|
||||
self.ctx
|
||||
.emit_warning("duplicate transition", item.1)
|
||||
.emit_info("previously defined here", previous.transition);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,13 +1,12 @@
|
|||
use std::collections::HashSet;
|
||||
|
||||
use super::*;
|
||||
|
||||
use crate::{
|
||||
delta_lower, dual_struct_serde, gamma_upper, loader::{
|
||||
delta_lower, dual_struct_serde, gamma_upper,
|
||||
loader::{
|
||||
BLANK_SYMBOL, Context, INITIAL_STATE, Spanned,
|
||||
ast::{self, Symbol as Sym},
|
||||
log::LogSink,
|
||||
}
|
||||
},
|
||||
};
|
||||
dual_struct_serde! {
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
|
||||
|
|
@ -62,10 +61,22 @@ dual_struct_serde! {{#[serde_with::serde_as]}
|
|||
|
||||
#[serde(borrow)]
|
||||
#[serde_as(as = "serde_with::Seq<(_, _)>")]
|
||||
pub transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>,
|
||||
pub transitions: HashMap<TransitionFrom<'a>, Vec<TransitionTo<'a>>>,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Hash, Clone, Copy, PartialEq, Eq)]
|
||||
struct Transition<'a> {
|
||||
pub state: State<'a>,
|
||||
pub symbol: Symbol<'a>,
|
||||
pub direction: Direction,
|
||||
}
|
||||
|
||||
struct TransitionInfo {
|
||||
pub transition: Span,
|
||||
pub function: Span,
|
||||
}
|
||||
|
||||
impl<'a> Tm<'a> {
|
||||
pub fn compile(
|
||||
items: impl Iterator<Item = Spanned<ast::TopLevel<'a>>>,
|
||||
|
|
@ -92,7 +103,7 @@ pub struct TmCompiler<'a, 'b> {
|
|||
final_states: HashMap<State<'a>, StateInfo>,
|
||||
final_states_def: Option<Span>,
|
||||
|
||||
transitions: HashMap<TransitionFrom<'a>, HashSet<TransitionTo<'a>>>,
|
||||
transitions: HashMap<TransitionFrom<'a>, HashMap<Transition<'a>, TransitionInfo>>,
|
||||
}
|
||||
|
||||
impl<'a, 'b> TmCompiler<'a, 'b> {
|
||||
|
|
@ -178,7 +189,24 @@ impl<'a, 'b> TmCompiler<'a, 'b> {
|
|||
states: self.states,
|
||||
symbols: self.symbols,
|
||||
final_states: self.final_states,
|
||||
transitions: self.transitions,
|
||||
transitions: self
|
||||
.transitions
|
||||
.into_iter()
|
||||
.map(|(k, v)| {
|
||||
(
|
||||
k,
|
||||
v.into_iter()
|
||||
.map(|(k, v)| TransitionTo {
|
||||
direction: k.direction,
|
||||
function: v.function,
|
||||
state: k.state,
|
||||
symbol: k.symbol,
|
||||
transition: v.transition,
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -393,18 +421,26 @@ impl<'a, 'b> TmCompiler<'a, 'b> {
|
|||
symbol: Symbol(from_tape.0),
|
||||
})
|
||||
.or_default();
|
||||
if !entry.is_empty() && !self.options.non_deterministic {
|
||||
self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1);
|
||||
if let Some(entry) = entry.iter().next()
|
||||
&& !self.options.non_deterministic
|
||||
{
|
||||
self.ctx.emit_error("transition already defined for this starting point (non determinism not permitted)", item.1)
|
||||
.emit_info("previously defined here", entry.1.transition);
|
||||
}
|
||||
if !entry.insert(TransitionTo {
|
||||
if let Some(previous) = entry.insert(
|
||||
Transition {
|
||||
state: State(to_state.0),
|
||||
symbol: Symbol(to_tape.0),
|
||||
direction: direction.0,
|
||||
|
||||
},
|
||||
TransitionInfo {
|
||||
function,
|
||||
transition: item.1,
|
||||
}) {
|
||||
self.ctx.emit_warning("duplicate transition", item.1);
|
||||
},
|
||||
) {
|
||||
self.ctx
|
||||
.emit_warning("duplicate transition", item.1)
|
||||
.emit_info("previously defined here", previous.transition);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
use crate::loader::{Span, Spanned};
|
||||
|
||||
|
||||
#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug, Default)]
|
||||
pub enum StringKind{
|
||||
pub enum StringKind {
|
||||
#[default]
|
||||
Regular,
|
||||
Regex
|
||||
Regex,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)]
|
||||
|
|
@ -64,7 +63,11 @@ impl<'a> std::fmt::Display for Token<'a> {
|
|||
Token::Ident(ident) if f.alternate() => write!(f, "{ident:?}"),
|
||||
Token::Ident(_) => write!(f, "ident"),
|
||||
|
||||
Token::String(string, kind, _) if f.alternate() => write!(f, "{}{string:?}", if *kind==StringKind::Regex {"r"} else {""}),
|
||||
Token::String(string, kind, _) if f.alternate() => write!(
|
||||
f,
|
||||
"{}{string:?}",
|
||||
if *kind == StringKind::Regex { "r" } else { "" }
|
||||
),
|
||||
Token::String(_, _, _) => write!(f, "string"),
|
||||
|
||||
Token::LineEnd => write!(f, "eol"),
|
||||
|
|
@ -169,12 +172,18 @@ impl<'a> std::iter::Iterator for Lexer<'a> {
|
|||
let mut escaped = false;
|
||||
loop {
|
||||
match self.consume() {
|
||||
Some('"') => break Ok(Token::String(&self.input[start+1..self.position], StringKind::Regular, escaped)),
|
||||
Some('"') => {
|
||||
break Ok(Token::String(
|
||||
&self.input[start + 1..self.position],
|
||||
StringKind::Regular,
|
||||
escaped,
|
||||
));
|
||||
}
|
||||
None => break Err(Error::UnclosedString),
|
||||
Some('\\') => {
|
||||
_ = self.consume();
|
||||
escaped = true;
|
||||
},
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -141,19 +141,26 @@ impl<'a, 'b> Parser<'a, 'b> {
|
|||
S(Tuple(items), start.join(end))
|
||||
}
|
||||
|
||||
fn parse_as_string(&mut self, tok: S<T<'a>>) -> S<Cow<'a, str>>{
|
||||
fn parse_as_string(&mut self, tok: S<T<'a>>) -> S<Cow<'a, str>> {
|
||||
let (r, k, e, s) = match tok {
|
||||
S(T::String(r, k, e), s) => (r, k, e, s),
|
||||
S(t, s) => {
|
||||
self.ctx.emit_error(format!("unexpected {:#} expected {:}", t, T::String("", Default::default(), false)), s);
|
||||
return S("<INVALID>".into(), s)
|
||||
self.ctx.emit_error(
|
||||
format!(
|
||||
"unexpected {:#} expected {:}",
|
||||
t,
|
||||
T::String("", Default::default(), false)
|
||||
),
|
||||
s,
|
||||
);
|
||||
return S("<INVALID>".into(), s);
|
||||
}
|
||||
};
|
||||
|
||||
S(r.into(), s)
|
||||
}
|
||||
|
||||
fn parse_string(&mut self) -> S<Cow<'a, str>>{
|
||||
fn parse_string(&mut self) -> S<Cow<'a, str>> {
|
||||
let tok = self.next_token();
|
||||
self.parse_as_string(tok)
|
||||
}
|
||||
|
|
@ -246,7 +253,7 @@ impl<'a, 'b> Parser<'a, 'b> {
|
|||
todo!()
|
||||
}
|
||||
|
||||
fn parse_as_production_unit(&mut self, tok: S<T<'a>>) -> S<ProductionUnit<'a>>{
|
||||
fn parse_as_production_unit(&mut self, tok: S<T<'a>>) -> S<ProductionUnit<'a>> {
|
||||
match tok {
|
||||
S(T::Tilde, r) => S(ProductionUnit::Epsilon("~"), r),
|
||||
S(T::Ident(repr @ epsilon!(pat)), r) => S(ProductionUnit::Epsilon(repr), r),
|
||||
|
|
@ -266,15 +273,17 @@ impl<'a, 'b> Parser<'a, 'b> {
|
|||
S(ProductionUnit::Ident("<INVALID>"), span)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
fn parse_production_unit(&mut self) -> S<ProductionUnit<'a>>{
|
||||
fn parse_production_unit(&mut self) -> S<ProductionUnit<'a>> {
|
||||
let tok = self.next_token();
|
||||
self.parse_as_production_unit(tok)
|
||||
}
|
||||
|
||||
fn parse_production_rule(&mut self, S(sym, start): S<ProductionUnit<'a>>) -> Option<S<TopLevel<'a>>> {
|
||||
fn parse_production_rule(
|
||||
&mut self,
|
||||
S(sym, start): S<ProductionUnit<'a>>,
|
||||
) -> Option<S<TopLevel<'a>>> {
|
||||
let mut lhs_group = ProductionGroup(vec![S(sym, start)]);
|
||||
let mut lhs_group_end = start;
|
||||
while !matches!(self.peek_token().0, T::LSmallArrow | T::LineEnd) {
|
||||
|
|
|
|||
|
|
@ -1,12 +1,9 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use automata::{
|
||||
delta_lower, epsilon, gamma_upper,
|
||||
loader::{self, Context, Span, Spanned, lexer::Lexer},
|
||||
sigma_upper,
|
||||
automatan::{fa::Fa, pda::Pda, tm::Tm}, delta_lower, epsilon, gamma_upper, loader::{self, Context, Machine, Span, Spanned, lexer::Lexer}, sigma_upper
|
||||
};
|
||||
|
||||
use serde::Serialize;
|
||||
use wasm_bindgen::prelude::wasm_bindgen;
|
||||
|
||||
#[wasm_bindgen]
|
||||
|
|
@ -144,14 +141,6 @@ pub struct CompileLog {
|
|||
pub end: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
pub struct Graph<'a> {
|
||||
initial: &'a str,
|
||||
final_states: Vec<&'a str>,
|
||||
states: Vec<&'a str>,
|
||||
transitions: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[wasm_bindgen(getter_with_clone)]
|
||||
pub struct CompileResult {
|
||||
pub log: Vec<CompileLog>,
|
||||
|
|
@ -159,12 +148,67 @@ pub struct CompileResult {
|
|||
pub machine: Option<String>,
|
||||
}
|
||||
|
||||
trait FixupSpan{
|
||||
fn fixup(&mut self, func: impl FnMut(Span) -> Span);
|
||||
}
|
||||
|
||||
impl<'a> FixupSpan for Machine<'a>{
|
||||
fn fixup(&mut self, func: impl FnMut(Span) -> Span) {
|
||||
match self{
|
||||
Machine::Fa(fa) => fa.fixup(func),
|
||||
Machine::Pda(pda) => pda.fixup(func),
|
||||
Machine::Tm(tm) => tm.fixup(func),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> FixupSpan for Fa<'a>{
|
||||
fn fixup(&mut self, mut func: impl FnMut(Span) -> Span) {
|
||||
self.alphabet.values_mut().for_each(|v| v.definition = func(v.definition));
|
||||
self.states.values_mut().for_each(|v| v.definition = func(v.definition));
|
||||
self.final_states.values_mut().for_each(|v| v.definition = func(v.definition));
|
||||
self.transitions.values_mut().flat_map(|v|v.iter_mut()).for_each(|e|{
|
||||
e.transition = func(e.transition);
|
||||
e.function = func(e.function);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FixupSpan for Pda<'a>{
|
||||
fn fixup(&mut self, mut func: impl FnMut(Span) -> Span) {
|
||||
self.alphabet.values_mut().for_each(|v| v.definition = func(v.definition));
|
||||
self.states.values_mut().for_each(|v| v.definition = func(v.definition));
|
||||
self.symbols.values_mut().for_each(|v| v.definition = func(v.definition));
|
||||
self.final_states.as_mut().unwrap_or(&mut HashMap::new()).values_mut().for_each(|v| v.definition = func(v.definition));
|
||||
self.transitions.values_mut().flat_map(|v|v.iter_mut()).for_each(|e|{
|
||||
e.transition = func(e.transition);
|
||||
e.function = func(e.function);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FixupSpan for Tm<'a>{
|
||||
fn fixup(&mut self, mut func: impl FnMut(Span) -> Span) {
|
||||
self.states.values_mut().for_each(|v| v.definition = func(v.definition));
|
||||
self.symbols.values_mut().for_each(|v| v.definition = func(v.definition));
|
||||
self.final_states.values_mut().for_each(|v| v.definition = func(v.definition));
|
||||
self.transitions.values_mut().flat_map(|v|v.iter_mut()).for_each(|e|{
|
||||
e.transition = func(e.transition);
|
||||
e.function = func(e.function);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[wasm_bindgen]
|
||||
pub fn compile(input: &str) -> CompileResult {
|
||||
let mut ctx = Context::new(input);
|
||||
let result = automata::loader::parse_universal(&mut ctx);
|
||||
|
||||
let machine = result.map(|result| serde_json::to_string(&result).unwrap());
|
||||
let machine = result.map(|mut result| {
|
||||
result.fixup(|span|Span(input[..span.0].chars().map(char::len_utf16).sum(), input[..span.1].chars().map(char::len_utf16).sum()));
|
||||
serde_json::to_string(&result).unwrap()
|
||||
});
|
||||
|
||||
use std::fmt::Write;
|
||||
let ansi_log = ctx.logs_display().fold(String::new(), |mut s, e| {
|
||||
|
|
@ -185,10 +229,10 @@ pub fn compile(input: &str) -> CompileResult {
|
|||
message: e.message,
|
||||
start: e
|
||||
.span
|
||||
.map(|span| input[..span.0].chars().map(char::len_utf16).count()),
|
||||
.map(|span| input[..span.0].chars().map(char::len_utf16).sum()),
|
||||
end: e
|
||||
.span
|
||||
.map(|span| input[..span.1].chars().map(char::len_utf16).count()),
|
||||
.map(|span| input[..span.1].chars().map(char::len_utf16).sum()),
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue