fixed parser

This commit is contained in:
ParkerTenBroeck 2026-01-05 20:20:08 -05:00
parent 7971c61c74
commit c35d7a9192
4 changed files with 110 additions and 64 deletions

View file

@ -1,4 +1,4 @@
type=NPDA //type=NPDA
Q = {q0, q1} // states Q = {q0, q1} // states
E = {a, b} // alphabet E = {a, b} // alphabet
T = {z0, A, B} // stack T = {z0, A, B} // stack

View file

@ -284,7 +284,7 @@ impl TransitionTable {
_ => logs.emit_error("expected ident", *src_d), _ => logs.emit_error("expected ident", *src_d),
}, },
TL::Item(S(name, dest_s), _) => { TL::Item(S(name, dest_s), _) => {
logs.emit_error(format!("unknown item {name:?}, expected 'Q'|'E'|'{SIGMA_UPPER}'|'sigma'|'F'|'T'|'{GAMMA_UPPER}'|'gamma'|'I'|'q0'|'S'|'z0'"), *dest_s); logs.emit_error(format!("unknown item {name:?}, expected 'Q' | 'E' | '{SIGMA_UPPER}' | 'sigma' | 'F' | 'T' | '{GAMMA_UPPER}' | 'gamma' | 'I' | 'q0' | 'S' | 'z0'"), *dest_s);
} }
TL::TransitionFunc( TL::TransitionFunc(
@ -376,7 +376,7 @@ impl TransitionTable {
} }
TL::TransitionFunc(S((S(name, _), _), dest_s), _) => { TL::TransitionFunc(S((S(name, _), _), dest_s), _) => {
logs.emit_error( logs.emit_error(
format!("unknown function {name:?}, expected 'd'|'delta'|'{DELTA_LOWER}'"), format!("unknown function {name:?}, expected 'd' | 'delta' | '{DELTA_LOWER}'"),
*dest_s, *dest_s,
); );
} }

View file

@ -6,7 +6,7 @@ use super::lexer::{Lexer, Token};
pub struct Parser<'a> { pub struct Parser<'a> {
lexer: Lexer<'a>, lexer: Lexer<'a>,
peek: Spanned<Option<Token<'a>>>, peek: Option<Spanned<Token<'a>>>,
logs: Logs<'a>, logs: Logs<'a>,
} }
@ -14,7 +14,7 @@ impl<'a> Parser<'a> {
pub fn new(lexer: Lexer<'a>) -> Self { pub fn new(lexer: Lexer<'a>) -> Self {
Parser { Parser {
logs: Logs::new(lexer.input()), logs: Logs::new(lexer.input()),
peek: Spanned(None, Span(0,0)), peek: None,
lexer, lexer,
} }
} }
@ -24,33 +24,39 @@ impl<'a> Parser<'a> {
} }
fn advance_line(&mut self) { fn advance_line(&mut self) {
if self.peek_token().is_none(){
return;
}
if self.expect_token(Token::LineEnd).0 { if self.expect_token(Token::LineEnd).0 {
self.peek = Spanned(None, Span(0,0)); self.peek = None;
} }
} }
fn next_token(&mut self) -> Spanned<Option<Token<'a>>> { fn next_token(&mut self) -> Option<Spanned<Token<'a>>> {
match self.peek.0 { match self.peek {
Some(Token::LineEnd) => return self.peek, Some(Spanned(Token::LineEnd, _)) => return self.peek,
Some(_) => return Spanned(self.peek.0.take(), self.peek.1), Some(_) => return self.peek.take(),
_ => {} _ => {}
} }
loop { loop {
match self.lexer.next() { match self.lexer.next() {
Some(Spanned(Ok(Token::Comment(_)), _)) => {} Some(Spanned(Ok(Token::Comment(_)), _)) => {}
Some(Spanned(Ok(Token::LineEnd), span)) => { Some(Spanned(Ok(Token::LineEnd), span)) => {
self.peek = Spanned(Some(Token::LineEnd), span); self.peek = Some(Spanned(Token::LineEnd, span));
return self.peek; return self.peek;
} }
Some(Spanned(Ok(ok), r)) => return Spanned(Some(ok), r), Some(Spanned(Ok(ok), r)) => return Some(Spanned(ok, r)),
Some(Spanned(Err(err), span)) => self.logs.emit_error(format!("lexer: {err:?}"), span), Some(Spanned(Err(err), span)) => {
None => return Spanned(None, self.lexer.eof_span()) self.logs.emit_error(format!("lexer: {err:?}"), span)
}
None => return None,
} }
} }
} }
fn peek_token(&mut self) -> Spanned<Option<Token<'a>>> { fn peek_token(&mut self) -> Option<Spanned<Token<'a>>> {
if self.peek.0.is_none() { if self.peek.is_none() {
self.peek = self.next_token(); self.peek = self.next_token();
} }
self.peek self.peek
@ -79,14 +85,14 @@ impl<'a> Parser<'a> {
fn parse_symbol(&mut self) -> Spanned<Symbol<'a>> { fn parse_symbol(&mut self) -> Spanned<Symbol<'a>> {
match self.next_token() { match self.next_token() {
Spanned(Some(Token::Tilde), r) => Spanned(Symbol::Epsilon, r), Some(Spanned(Token::Tilde, r)) => Spanned(Symbol::Epsilon, r),
Spanned(Some(Token::Ident("epsilon")), r) => Spanned(Symbol::Epsilon, r), Some(Spanned(Token::Ident("epsilon"), r)) => Spanned(Symbol::Epsilon, r),
Spanned(Some(Token::Ident(super::EPSILON_LOWER)), r) => Spanned(Symbol::Epsilon, r), Some(Spanned(Token::Ident(super::EPSILON_LOWER), r)) => Spanned(Symbol::Epsilon, r),
Spanned(Some(Token::Ident(ident)), r) => Spanned(Symbol::Ident(ident), r), Some(Spanned(Token::Ident(ident), r)) => Spanned(Symbol::Ident(ident), r),
Spanned(Some(got), span) => { Some(Spanned(got, span)) => {
self.logs.emit_error( self.logs.emit_error(
format!( format!(
"unexpected token {:#}, expected {:}|{:} (symbol)", "unexpected {:#}, expected {:} | {:} (symbol)",
got, got,
Token::Tilde, Token::Tilde,
Token::Ident("") Token::Ident("")
@ -95,14 +101,14 @@ impl<'a> Parser<'a> {
); );
Spanned(Symbol::Ident("<INVALID>"), span) Spanned(Symbol::Ident("<INVALID>"), span)
} }
Spanned(None, span) => { None => {
self.logs.emit_error( self.logs.emit_error(
format!( format!(
"unexpected eof expected {:}|{:} (symbol)", "unexpected eof expected {:} | {:} (symbol)",
Token::Tilde, Token::Tilde,
Token::Ident("") Token::Ident("")
), ),
span, self.eof(),
); );
Spanned(Symbol::Ident("<INVALID>"), self.eof()) Spanned(Symbol::Ident("<INVALID>"), self.eof())
} }
@ -154,7 +160,7 @@ impl<'a> Parser<'a> {
self.next_token(); self.next_token();
self.logs.emit_error( self.logs.emit_error(
format!( format!(
"unexpected token {:#}, expected {:}|{:}|{:}|{:}|{:} (item)", "unexpected {:#}, expected {:} | {:} | {:} | {:} | {:} (item)",
got, got,
Token::Tilde, Token::Tilde,
Token::Ident(""), Token::Ident(""),
@ -169,7 +175,7 @@ impl<'a> Parser<'a> {
None => { None => {
self.logs.emit_error( self.logs.emit_error(
format!( format!(
"unexpected eof expected {:}|{:}|{:}|{:}|{:} (item)", "unexpected eof expected {:} | {:} | {:} | {:} | {:} (item)",
Token::Tilde, Token::Tilde,
Token::Ident(""), Token::Ident(""),
Token::LPar, Token::LPar,
@ -192,7 +198,7 @@ impl<'a> Parser<'a> {
Some(Spanned(got, span)) => { Some(Spanned(got, span)) => {
self.logs.emit_error( self.logs.emit_error(
format!( format!(
"unexpected token {:#}, expected {:}|{:}", "unexpected {:#}, expected {:} | {:}",
got, got,
Token::RBrace, Token::RBrace,
Token::RBracket Token::RBracket
@ -204,7 +210,7 @@ impl<'a> Parser<'a> {
None => { None => {
self.logs.emit_error( self.logs.emit_error(
format!( format!(
"unexpected eof expected {:}|{:}", "unexpected eof expected {:} | {:}",
Token::RBrace, Token::RBrace,
Token::RBracket Token::RBracket
), ),
@ -267,42 +273,62 @@ impl<'a> Parser<'a> {
lhs_group_end = sym.1; lhs_group_end = sym.1;
lhs_group.0.push(sym); lhs_group.0.push(sym);
} }
if !self.expect_token(Token::LSmallArrow).0{ if !self.expect_token(Token::LSmallArrow).0 {
return Some(Spanned(TopLevel::ProductionRule(Spanned(lhs_group, start.join(lhs_group_end)), Spanned(vec![], lhs_group_end)), start.join(lhs_group_end))) return Some(Spanned(
TopLevel::ProductionRule(
Spanned(lhs_group, start.join(lhs_group_end)),
Spanned(vec![], lhs_group_end),
),
start.join(lhs_group_end),
));
} }
let mut groups = Vec::new(); let mut groups = Vec::new();
while !matches!(self.peek_token(), None | Some(Spanned(Token::LineEnd, _))){ loop {
let mut group = ProductionGroup(vec![]); let mut group = ProductionGroup(vec![]);
while !matches!(self.peek_token(), None | Some(Spanned(Token::LineEnd|Token::Or, _))){ while !matches!(
self.peek_token(),
None | Some(Spanned(Token::LineEnd | Token::Or, _))
) {
group.0.push(self.parse_symbol()); group.0.push(self.parse_symbol());
} }
if group.0.is_empty(){
let span = if let Some(Spanned(_, span)) = self.peek_token(){ if group.0.is_empty() {
span let eof = self.eof();
}else{ let span = self.peek_token().map(|t| t.1).unwrap_or(eof);
self.eof() self.logs
}; .emit_error("cannot have empty production group", span);
self.logs.emit_error("cannot have empty production rule", span);
} }
if matches!(self.peek_token(), Some(Spanned(Token::Or, _))){
let group_start = group.0.first().map(|g| g.1).unwrap_or(start);
let group_end = group.0.last().map(|g| g.1).unwrap_or(start);
groups.push(Spanned(group, group_start.join(group_end)));
if matches!(self.peek_token(), Some(Spanned(Token::Or, _))) {
self.next_token(); self.next_token();
// if matches!(self.peek_token(), None|Spanned(Token::Or|Token::LineEnd)) } else {
break;
} }
let group_start = group.0.first().map(|g|g.1).unwrap_or(start);
let group_end = group.0.last().map(|g|g.1).unwrap_or(start);
groups.push(Spanned(group, group_start.join(group_end)))
} }
if groups.is_empty(){ if groups.is_empty() {
self.logs.emit_error("cannot have empty production rule", start.join(lhs_group_end)); self.logs.emit_error(
"cannot have empty production rule",
start.join(lhs_group_end),
);
} }
let rules_start = groups.first().map(|f|f.1).unwrap_or(start); let rules_start = groups.first().map(|f| f.1).unwrap_or(start);
let rules_end = groups.last().map(|f|f.1).unwrap_or(start); let rules_end = groups.last().map(|f| f.1).unwrap_or(start);
Some(Spanned(TopLevel::ProductionRule(Spanned(lhs_group, start.join(lhs_group_end)), Spanned(groups, rules_start.join(rules_end))), start.join(rules_end))) Some(Spanned(
TopLevel::ProductionRule(
Spanned(lhs_group, start.join(lhs_group_end)),
Spanned(groups, rules_start.join(rules_end)),
),
start.join(rules_end),
))
} }
fn parse_transition_function( fn parse_transition_function(
@ -350,7 +376,7 @@ impl<'a> Parser<'a> {
break Some(pr); break Some(pr);
} }
} }
(Spanned(Token::Ident(ident), start), _) => { (Spanned(Token::Ident(ident), start), Some(Spanned(Token::Eq, _))) => {
let name = Spanned(ident, start); let name = Spanned(ident, start);
if !self.expect_token(Token::Eq).0 { if !self.expect_token(Token::Eq).0 {
continue; continue;
@ -359,19 +385,39 @@ impl<'a> Parser<'a> {
let span = start.join(item.1); let span = start.join(item.1);
break Some(Spanned(TopLevel::Item(name, item), span)); break Some(Spanned(TopLevel::Item(name, item), span));
} }
_ => { (Spanned(Token::Ident(_), _), after) => {
match after {
Some(Spanned(tok, span)) => {
self.logs.emit_error( self.logs.emit_error(
format!( format!(
"unexpected token {:#}, expected {:}", "unexpected {:#}, expected {:} | {:}",
next.0, tok,
Token::Ident("") Token::Eq,
Token::LSmallArrow
), ),
span,
);
}
None => {
self.logs.emit_error(
format!(
"unexpected eof, expected {:} | {:}",
Token::Eq,
Token::LSmallArrow
),
self.eof(),
);
}
}
while !matches!(self.next_token(), None | Some(Spanned(Token::LineEnd, _))) {}
}
_ => {
self.logs.emit_error(
format!("unexpected {:#}, expected {:}", next.0, Token::Ident("")),
next.1, next.1,
); );
while !matches!(self.next_token(), None|Some(Spanned(Token::LineEnd, _))){ while !matches!(self.next_token(), None | Some(Spanned(Token::LineEnd, _))) {}
} }
},
} }
}; };
self.advance_line(); self.advance_line();

View file

@ -18,7 +18,7 @@ fn main() {
} }
}; };
let input = "aababdsaab"; let input = "aababaaba";
println!("running on: '{input}'"); println!("running on: '{input}'");
let mut simulator = npda::Simulator::begin(input, table); let mut simulator = npda::Simulator::begin(input, table);
loop { loop {