added strings to language, adding more examples/tutorials

This commit is contained in:
Parker TenBroeck 2026-01-13 12:52:08 -05:00
parent bb8829833c
commit 6b2ad8112e
5 changed files with 198 additions and 11 deletions

View file

@ -1,4 +1,4 @@
use std::ops::Range;
use std::{borrow::Cow, ops::Range};
use super::Spanned;
@ -23,6 +23,7 @@ pub enum Symbol<'a> {
#[derive(Clone, Debug)]
pub enum Item<'a> {
Symbol(Symbol<'a>),
String(Cow<'a, str>),
Tuple(Tuple<'a>),
List(List<'a>),
}
@ -46,7 +47,14 @@ pub enum Regex<'a> {
pub struct List<'a>(pub Vec<Spanned<Item<'a>>>, pub ListKind);
#[derive(Clone, Debug)]
pub struct ProductionGroup<'a>(pub Vec<Spanned<Symbol<'a>>>);
pub enum ProductionUnit<'a> {
Epsilon(&'a str),
Ident(&'a str),
String(Cow<'a, str>),
}
#[derive(Clone, Debug)]
pub struct ProductionGroup<'a>(pub Vec<Spanned<ProductionUnit<'a>>>);
#[derive(Clone, Debug)]
pub enum TopLevel<'a> {
@ -70,6 +78,7 @@ impl<'a> Spanned<Item<'a>> {
Item::Symbol(sym) => return Some(*sym),
Item::Tuple(_) => _ = ctx.emit_error("expected ident found tuple", self.1),
Item::List(_) => _ = ctx.emit_error("expected ident found list", self.1),
Item::String(_) => _ = ctx.emit_error("expected ident found string", self.1),
}
None
}
@ -82,6 +91,7 @@ impl<'a> Spanned<Item<'a>> {
}
Item::Tuple(_) => _ = ctx.emit_error("expected ident found tuple", self.1),
Item::List(_) => _ = ctx.emit_error("expected ident found list", self.1),
Item::String(_) => _ = ctx.emit_error("expected ident found string", self.1),
}
None
}
@ -95,6 +105,7 @@ impl<'a> Spanned<Item<'a>> {
_ = ctx.emit_error("expected set found epsilon", self.1)
}
Item::Tuple(_) => _ = ctx.emit_error("expected set found tuple", self.1),
Item::String(_) => _ = ctx.emit_error("expected set found string", self.1),
Item::List(list) => return Some(&list.0),
}
None
@ -109,6 +120,7 @@ impl<'a> Spanned<Item<'a>> {
_ = ctx.emit_error("expected list found epsilon", self.1)
}
Item::Tuple(_) => _ = ctx.emit_error("expected list found tuple", self.1),
Item::String(_) => _ = ctx.emit_error("expected list found string", self.1),
Item::List(list) => return Some(&list.0),
}
None
@ -138,6 +150,7 @@ impl<'a> Spanned<Item<'a>> {
}
Item::Tuple(tuple) => return Some(Spanned(tuple, self.1)),
Item::List(_) => _ = ctx.emit_error("expected tuple found list", self.1),
Item::String(_) => _ = ctx.emit_error("expected tuple found string", self.1),
}
None
}

View file

@ -1,5 +1,13 @@
use crate::loader::{Span, Spanned};
#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug, Default)]
pub enum StringKind{
#[default]
Regular,
Regex
}
#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)]
pub enum Token<'a> {
LPar,
@ -14,6 +22,7 @@ pub enum Token<'a> {
Tilde,
Eq,
Comma,
Dash,
Or,
Plus,
@ -26,6 +35,8 @@ pub enum Token<'a> {
Comment(&'a str),
Ident(&'a str),
String(&'a str, StringKind, bool),
LineEnd,
}
@ -43,13 +54,19 @@ impl<'a> std::fmt::Display for Token<'a> {
Token::Comma => write!(f, "','"),
Token::Or => write!(f, "'|'"),
Token::Plus => write!(f, "'+'"),
Token::Dash => write!(f, "'-'"),
Token::Star => write!(f, "'*'"),
Token::And => write!(f, "'&'"),
Token::LSmallArrow => write!(f, "'->'"),
Token::LBigArrow => write!(f, "'=>'"),
Token::Comment(_) => write!(f, "<comment>"),
Token::Ident(ident) if f.alternate() => write!(f, "{ident:?}"),
Token::Ident(_) => write!(f, "ident"),
Token::String(string, kind, _) if f.alternate() => write!(f, "{}{string:?}", if *kind==StringKind::Regex {"r"} else {""}),
Token::String(_, _, _) => write!(f, "string"),
Token::LineEnd => write!(f, "eol"),
}
}
@ -65,6 +82,7 @@ pub struct Lexer<'a> {
pub enum Error {
InvalidChar(char),
UnclosedMultiLine,
UnclosedString,
}
impl<'a> Lexer<'a> {
@ -145,8 +163,22 @@ impl<'a> std::iter::Iterator for Lexer<'a> {
self.consume();
Ok(Token::LSmallArrow)
}
_ => Err(Error::InvalidChar('-')),
_ => Ok(Token::Dash),
},
'"' => {
let mut escaped = false;
loop {
match self.consume() {
Some('"') => break Ok(Token::String(&self.input[start+1..self.position], StringKind::Regular, escaped)),
None => break Err(Error::UnclosedString),
Some('\\') => {
_ = self.consume();
escaped = true;
},
_ => {}
}
}
}
'/' => match self.consume() {
Some('/') => loop {

View file

@ -1,3 +1,5 @@
use std::borrow::Cow;
use crate::epsilon;
use crate::loader::log::LogSink;
use crate::loader::{Context, Span};
@ -139,19 +141,38 @@ impl<'a, 'b> Parser<'a, 'b> {
S(Tuple(items), start.join(end))
}
fn parse_as_string(&mut self, tok: S<T<'a>>) -> S<Cow<'a, str>>{
let (r, k, e, s) = match tok {
S(T::String(r, k, e), s) => (r, k, e, s),
S(t, s) => {
self.ctx.emit_error(format!("unexpected {:#} expected {:}", t, T::String("", Default::default(), false)), s);
return S("<INVALID>".into(), s)
}
};
S(r.into(), s)
}
fn parse_string(&mut self) -> S<Cow<'a, str>>{
let tok = self.next_token();
self.parse_as_string(tok)
}
fn parse_item(&mut self) -> S<Item<'a>> {
match self.peek_token().0 {
T::Ident(_) | T::Tilde => self.parse_symbol().map(Item::Symbol),
T::String(_, _, _) => self.parse_string().map(Item::String),
T::LPar => self.parse_tupple().map(Item::Tuple),
T::LBrace | T::LBracket => self.parse_list().map(Item::List),
_ => {
let S(got, span) = self.next_token();
self.ctx.emit_error(
format!(
"unexpected {:#} expected item ( {:} | {:} | {:} | {:} | {:} )",
"unexpected {:#} expected item ( {:} | {:} | {:} | {:} | {:} | {:} )",
got,
T::Tilde,
T::Ident(""),
T::String("", Default::default(), false),
T::LPar,
T::LBrace,
T::LBracket,
@ -225,11 +246,39 @@ impl<'a, 'b> Parser<'a, 'b> {
todo!()
}
fn parse_production_rule(&mut self, S(sym, start): S<Symbol<'a>>) -> Option<S<TopLevel<'a>>> {
fn parse_as_production_unit(&mut self, tok: S<T<'a>>) -> S<ProductionUnit<'a>>{
match tok {
S(T::Tilde, r) => S(ProductionUnit::Epsilon("~"), r),
S(T::Ident(repr @ epsilon!(pat)), r) => S(ProductionUnit::Epsilon(repr), r),
S(T::Ident(ident), r) => S(ProductionUnit::Ident(ident), r),
S(T::String(_, _, _), _) => self.parse_as_string(tok).map(ProductionUnit::String),
S(got, span) => {
self.ctx.emit_error(
format!(
"unexpected {:#} expected production unit ( {:} | {:} | {:} )",
got,
T::Tilde,
T::Ident(""),
T::String("", Default::default(), false)
),
span,
);
S(ProductionUnit::Ident("<INVALID>"), span)
}
}
}
fn parse_production_unit(&mut self) -> S<ProductionUnit<'a>>{
let tok = self.next_token();
self.parse_as_production_unit(tok)
}
fn parse_production_rule(&mut self, S(sym, start): S<ProductionUnit<'a>>) -> Option<S<TopLevel<'a>>> {
let mut lhs_group = ProductionGroup(vec![S(sym, start)]);
let mut lhs_group_end = start;
while !matches!(self.peek_token().0, T::LSmallArrow | T::LineEnd) {
let sym = self.parse_symbol();
let sym = self.parse_production_unit();
lhs_group_end = sym.1;
lhs_group.0.push(sym);
}
@ -248,7 +297,7 @@ impl<'a, 'b> Parser<'a, 'b> {
loop {
let mut group = ProductionGroup(vec![]);
while !matches!(self.peek_token().0, T::LineEnd | T::Or) {
group.0.push(self.parse_symbol());
group.0.push(self.parse_production_unit());
}
if group.0.is_empty() {
@ -327,10 +376,10 @@ impl<'a, 'b> Parser<'a, 'b> {
}
// production rule
(
sym @ S(T::Ident(_) | T::Tilde, _),
sym @ S(T::Ident(_) | T::Tilde | T::String(_, _, _), _),
S(T::LSmallArrow | T::Ident(_) | T::Tilde, _),
) => {
let sym = self.parse_as_symbol(sym);
let sym = self.parse_as_production_unit(sym);
if let Some(pr) = self.parse_production_rule(sym) {
break Some(pr);
}