From 6b2ad8112ebdb1e9514999797653014209675d50 Mon Sep 17 00:00:00 2001 From: Parker TenBroeck <51721964+ParkerTenBroeck@users.noreply.github.com> Date: Tue, 13 Jan 2026 12:52:08 -0500 Subject: [PATCH] added strings to language, adding more examples/tutorials --- automata/src/loader/ast.rs | 17 ++++++- automata/src/loader/lexer.rs | 34 ++++++++++++- automata/src/loader/parser.rs | 61 ++++++++++++++++++++--- web/root/src/examples.ts | 94 ++++++++++++++++++++++++++++++++++- web_lib/src/lib.rs | 3 ++ 5 files changed, 198 insertions(+), 11 deletions(-) diff --git a/automata/src/loader/ast.rs b/automata/src/loader/ast.rs index a70ed32..8e219a1 100644 --- a/automata/src/loader/ast.rs +++ b/automata/src/loader/ast.rs @@ -1,4 +1,4 @@ -use std::ops::Range; +use std::{borrow::Cow, ops::Range}; use super::Spanned; @@ -23,6 +23,7 @@ pub enum Symbol<'a> { #[derive(Clone, Debug)] pub enum Item<'a> { Symbol(Symbol<'a>), + String(Cow<'a, str>), Tuple(Tuple<'a>), List(List<'a>), } @@ -46,7 +47,14 @@ pub enum Regex<'a> { pub struct List<'a>(pub Vec>>, pub ListKind); #[derive(Clone, Debug)] -pub struct ProductionGroup<'a>(pub Vec>>); +pub enum ProductionUnit<'a> { + Epsilon(&'a str), + Ident(&'a str), + String(Cow<'a, str>), +} + +#[derive(Clone, Debug)] +pub struct ProductionGroup<'a>(pub Vec>>); #[derive(Clone, Debug)] pub enum TopLevel<'a> { @@ -70,6 +78,7 @@ impl<'a> Spanned> { Item::Symbol(sym) => return Some(*sym), Item::Tuple(_) => _ = ctx.emit_error("expected ident found tuple", self.1), Item::List(_) => _ = ctx.emit_error("expected ident found list", self.1), + Item::String(_) => _ = ctx.emit_error("expected ident found string", self.1), } None } @@ -82,6 +91,7 @@ impl<'a> Spanned> { } Item::Tuple(_) => _ = ctx.emit_error("expected ident found tuple", self.1), Item::List(_) => _ = ctx.emit_error("expected ident found list", self.1), + Item::String(_) => _ = ctx.emit_error("expected ident found string", self.1), } None } @@ -95,6 +105,7 @@ impl<'a> Spanned> { _ = ctx.emit_error("expected set found epsilon", self.1) } Item::Tuple(_) => _ = ctx.emit_error("expected set found tuple", self.1), + Item::String(_) => _ = ctx.emit_error("expected set found string", self.1), Item::List(list) => return Some(&list.0), } None @@ -109,6 +120,7 @@ impl<'a> Spanned> { _ = ctx.emit_error("expected list found epsilon", self.1) } Item::Tuple(_) => _ = ctx.emit_error("expected list found tuple", self.1), + Item::String(_) => _ = ctx.emit_error("expected list found string", self.1), Item::List(list) => return Some(&list.0), } None @@ -138,6 +150,7 @@ impl<'a> Spanned> { } Item::Tuple(tuple) => return Some(Spanned(tuple, self.1)), Item::List(_) => _ = ctx.emit_error("expected tuple found list", self.1), + Item::String(_) => _ = ctx.emit_error("expected tuple found string", self.1), } None } diff --git a/automata/src/loader/lexer.rs b/automata/src/loader/lexer.rs index c4bc999..956b525 100644 --- a/automata/src/loader/lexer.rs +++ b/automata/src/loader/lexer.rs @@ -1,5 +1,13 @@ use crate::loader::{Span, Spanned}; + +#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug, Default)] +pub enum StringKind{ + #[default] + Regular, + Regex +} + #[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)] pub enum Token<'a> { LPar, @@ -14,6 +22,7 @@ pub enum Token<'a> { Tilde, Eq, Comma, + Dash, Or, Plus, @@ -26,6 +35,8 @@ pub enum Token<'a> { Comment(&'a str), Ident(&'a str), + + String(&'a str, StringKind, bool), LineEnd, } @@ -43,13 +54,19 @@ impl<'a> std::fmt::Display for Token<'a> { Token::Comma => write!(f, "','"), Token::Or => write!(f, "'|'"), Token::Plus => write!(f, "'+'"), + Token::Dash => write!(f, "'-'"), Token::Star => write!(f, "'*'"), Token::And => write!(f, "'&'"), Token::LSmallArrow => write!(f, "'->'"), Token::LBigArrow => write!(f, "'=>'"), Token::Comment(_) => write!(f, ""), + Token::Ident(ident) if f.alternate() => write!(f, "{ident:?}"), Token::Ident(_) => write!(f, "ident"), + + Token::String(string, kind, _) if f.alternate() => write!(f, "{}{string:?}", if *kind==StringKind::Regex {"r"} else {""}), + Token::String(_, _, _) => write!(f, "string"), + Token::LineEnd => write!(f, "eol"), } } @@ -65,6 +82,7 @@ pub struct Lexer<'a> { pub enum Error { InvalidChar(char), UnclosedMultiLine, + UnclosedString, } impl<'a> Lexer<'a> { @@ -145,8 +163,22 @@ impl<'a> std::iter::Iterator for Lexer<'a> { self.consume(); Ok(Token::LSmallArrow) } - _ => Err(Error::InvalidChar('-')), + _ => Ok(Token::Dash), }, + '"' => { + let mut escaped = false; + loop { + match self.consume() { + Some('"') => break Ok(Token::String(&self.input[start+1..self.position], StringKind::Regular, escaped)), + None => break Err(Error::UnclosedString), + Some('\\') => { + _ = self.consume(); + escaped = true; + }, + _ => {} + } + } + } '/' => match self.consume() { Some('/') => loop { diff --git a/automata/src/loader/parser.rs b/automata/src/loader/parser.rs index 2426339..c28df60 100644 --- a/automata/src/loader/parser.rs +++ b/automata/src/loader/parser.rs @@ -1,3 +1,5 @@ +use std::borrow::Cow; + use crate::epsilon; use crate::loader::log::LogSink; use crate::loader::{Context, Span}; @@ -139,19 +141,38 @@ impl<'a, 'b> Parser<'a, 'b> { S(Tuple(items), start.join(end)) } + fn parse_as_string(&mut self, tok: S>) -> S>{ + let (r, k, e, s) = match tok { + S(T::String(r, k, e), s) => (r, k, e, s), + S(t, s) => { + self.ctx.emit_error(format!("unexpected {:#} expected {:}", t, T::String("", Default::default(), false)), s); + return S("".into(), s) + } + }; + + S(r.into(), s) + } + + fn parse_string(&mut self) -> S>{ + let tok = self.next_token(); + self.parse_as_string(tok) + } + fn parse_item(&mut self) -> S> { match self.peek_token().0 { T::Ident(_) | T::Tilde => self.parse_symbol().map(Item::Symbol), + T::String(_, _, _) => self.parse_string().map(Item::String), T::LPar => self.parse_tupple().map(Item::Tuple), T::LBrace | T::LBracket => self.parse_list().map(Item::List), _ => { let S(got, span) = self.next_token(); self.ctx.emit_error( format!( - "unexpected {:#} expected item ( {:} | {:} | {:} | {:} | {:} )", + "unexpected {:#} expected item ( {:} | {:} | {:} | {:} | {:} | {:} )", got, T::Tilde, T::Ident(""), + T::String("", Default::default(), false), T::LPar, T::LBrace, T::LBracket, @@ -225,11 +246,39 @@ impl<'a, 'b> Parser<'a, 'b> { todo!() } - fn parse_production_rule(&mut self, S(sym, start): S>) -> Option>> { + fn parse_as_production_unit(&mut self, tok: S>) -> S>{ + match tok { + S(T::Tilde, r) => S(ProductionUnit::Epsilon("~"), r), + S(T::Ident(repr @ epsilon!(pat)), r) => S(ProductionUnit::Epsilon(repr), r), + S(T::Ident(ident), r) => S(ProductionUnit::Ident(ident), r), + S(T::String(_, _, _), _) => self.parse_as_string(tok).map(ProductionUnit::String), + S(got, span) => { + self.ctx.emit_error( + format!( + "unexpected {:#} expected production unit ( {:} | {:} | {:} )", + got, + T::Tilde, + T::Ident(""), + T::String("", Default::default(), false) + ), + span, + ); + S(ProductionUnit::Ident(""), span) + } + } + + } + + fn parse_production_unit(&mut self) -> S>{ + let tok = self.next_token(); + self.parse_as_production_unit(tok) + } + + fn parse_production_rule(&mut self, S(sym, start): S>) -> Option>> { let mut lhs_group = ProductionGroup(vec![S(sym, start)]); let mut lhs_group_end = start; while !matches!(self.peek_token().0, T::LSmallArrow | T::LineEnd) { - let sym = self.parse_symbol(); + let sym = self.parse_production_unit(); lhs_group_end = sym.1; lhs_group.0.push(sym); } @@ -248,7 +297,7 @@ impl<'a, 'b> Parser<'a, 'b> { loop { let mut group = ProductionGroup(vec![]); while !matches!(self.peek_token().0, T::LineEnd | T::Or) { - group.0.push(self.parse_symbol()); + group.0.push(self.parse_production_unit()); } if group.0.is_empty() { @@ -327,10 +376,10 @@ impl<'a, 'b> Parser<'a, 'b> { } // production rule ( - sym @ S(T::Ident(_) | T::Tilde, _), + sym @ S(T::Ident(_) | T::Tilde | T::String(_, _, _), _), S(T::LSmallArrow | T::Ident(_) | T::Tilde, _), ) => { - let sym = self.parse_as_symbol(sym); + let sym = self.parse_as_production_unit(sym); if let Some(pr) = self.parse_production_rule(sym) { break Some(pr); } diff --git a/web/root/src/examples.ts b/web/root/src/examples.ts index bf2304b..0afb418 100644 --- a/web/root/src/examples.ts +++ b/web/root/src/examples.ts @@ -7,7 +7,8 @@ export type Category = | "DPDA" | "NPDA" | "TM" - | "NTM"; + | "NTM" + | "CFG"; export class Example { readonly category: Category; @@ -48,6 +49,56 @@ d(qb, b) = qb d(qb', a) = qb' d(qb', b) = qb`, + ), + + new Example( + "Tutorial", + "NFA", + `// strings of 1's whos length is divisible by two or three and longer than 1 + +type = NFA // type of machine +Q = {q0, q2, q2f, q3, q3', q3f} // set of states +E = {1} // alphabet +F = {q2f, q3f} // set of final states +q0 = q0 // initial state + +// transition function (state, letter) -> state + +// non deterministic part +d(q0, 1) = q2 +d(q0, 1) = q3 + +d(q2, 1) = q2f +d(q2f, 1) = q2 + +d(q3, 1) = q3' +d(q3', 1) = q3f +d(q3f, 1) = q3 +`, + ), + + new Example( + "Tutorial", + "NFA w/ epsilon", + `// strings containing only all a's, or all b's, or all c's + +type = NFA // type of machine +Q = {q0, qa, qb, qc} // set of states +E = {a, b, c} // alphabet +F = {qa, qb, qc} // set of final states +q0 = q0 // initial state + +// transition function (state, letter) -> state + +// non deterministic part +d(q0, epsilon) = qa +d(q0, epsilon) = qb +d(q0, epsilon) = qc + +d(qa, a) = qa +d(qb, b) = qb +d(qc, c) = qc +`, ), new Example( @@ -127,10 +178,19 @@ d(q0, b, A) = (q0, [B A]) d(q0, a, B) = (q0, [A B]) d(q0, b, B) = (q0, [B B]) -// transition to q1 +// transition to q1 +// even d(q0, epsilon, z0) = { (q1, z0) } d(q0, epsilon, A) = { (q1, A) } d(q0, epsilon, B) = { (q1, B) } +// odd +d(q0, a, z0) = { (q1, z0) } +d(q0, a, A) = { (q1, A) } +d(q0, a, B) = { (q1, B) } + +d(q0, b, z0) = { (q1, z0) } +d(q0, b, A) = { (q1, A) } +d(q0, b, B) = { (q1, B) } // consume stack until empty d(q1, a, A) = { (q1, epsilon) } @@ -186,6 +246,35 @@ d(q2,X)=(q0,x,R) d(q0,Y)=(q3,y,R) d(q3,Y)=(q3,y,R) d(q3,B)=(q4,B,R) +`), + + new Example("CFG", "definition", + `// CFG's aren't supported yet, and this definition is not complete. +// This is the definition for the grammar the definition has itself + +type=CFG + +S -> TopLevel | TopLevel S + +TopLevel -> Ident "=" Item // Item +TopLevel -> Ident Tuple "=" Item // Transition Functions +TopLevel -> Production | Table + +Item -> Symbol | String | Tuple | List + +Symbol -> Ident | "~" +String -> "\"" "\"" +Tuple -> "(" ItemList ")" +List -> "{" ItemList "}" | "[" ItemList "]" + +ItemList -> ~ | Item ItemList | Item "," ItemList + +Production -> ProductionGroup "->" ProductionGroupList +ProductionGroupList -> ProductionGroup | ProductionGroupList "|" ProductionGroup +ProductionGroup -> ProductionUnit | ProductionGroup ProductionUnit +ProductionUnit -> Ident | "~" | String + + `) ]; @@ -197,6 +286,7 @@ const CATEGORY_ORDER: Category[] = [ "NPDA", "TM", "NTM", + "CFG", ]; function buildExamplesDropdown( diff --git a/web_lib/src/lib.rs b/web_lib/src/lib.rs index f38855d..e861b20 100644 --- a/web_lib/src/lib.rs +++ b/web_lib/src/lib.rs @@ -19,6 +19,7 @@ pub fn init() { pub enum Kind { Ident = "ident", Keyword = "keyword", + String = "string", Error = "error", Comment = "comment", Punc = "punc", @@ -77,11 +78,13 @@ pub fn lex(input: &str) -> Vec { Token::Comma => Kind::Punc, Token::Or => Kind::Punc, Token::Plus => Kind::Punc, + Token::Dash => Kind::Punc, Token::Star => Kind::Punc, Token::And => Kind::Punc, Token::LSmallArrow => Kind::Punc, Token::LBigArrow => Kind::Punc, Token::Comment(_) => Kind::Comment, + Token::String(_, _, _) => Kind::String, Token::Ident(_) if input[..start_utf8] .split("\n")