From c35d7a91927806585e11649e5373bb1763b778be Mon Sep 17 00:00:00 2001 From: ParkerTenBroeck <51721964+ParkerTenBroeck@users.noreply.github.com> Date: Mon, 5 Jan 2026 20:20:08 -0500 Subject: [PATCH] fixed parser --- example.npda | 2 +- src/automata/npda.rs | 4 +- src/loader/parser.rs | 166 +++++++++++++++++++++++++++---------------- src/main.rs | 2 +- 4 files changed, 110 insertions(+), 64 deletions(-) diff --git a/example.npda b/example.npda index 2649bd5..60a9f6b 100644 --- a/example.npda +++ b/example.npda @@ -1,4 +1,4 @@ -type=NPDA +//type=NPDA Q = {q0, q1} // states E = {a, b} // alphabet T = {z0, A, B} // stack diff --git a/src/automata/npda.rs b/src/automata/npda.rs index dad5b07..eeaa071 100644 --- a/src/automata/npda.rs +++ b/src/automata/npda.rs @@ -284,7 +284,7 @@ impl TransitionTable { _ => logs.emit_error("expected ident", *src_d), }, TL::Item(S(name, dest_s), _) => { - logs.emit_error(format!("unknown item {name:?}, expected 'Q'|'E'|'{SIGMA_UPPER}'|'sigma'|'F'|'T'|'{GAMMA_UPPER}'|'gamma'|'I'|'q0'|'S'|'z0'"), *dest_s); + logs.emit_error(format!("unknown item {name:?}, expected 'Q' | 'E' | '{SIGMA_UPPER}' | 'sigma' | 'F' | 'T' | '{GAMMA_UPPER}' | 'gamma' | 'I' | 'q0' | 'S' | 'z0'"), *dest_s); } TL::TransitionFunc( @@ -376,7 +376,7 @@ impl TransitionTable { } TL::TransitionFunc(S((S(name, _), _), dest_s), _) => { logs.emit_error( - format!("unknown function {name:?}, expected 'd'|'delta'|'{DELTA_LOWER}'"), + format!("unknown function {name:?}, expected 'd' | 'delta' | '{DELTA_LOWER}'"), *dest_s, ); } diff --git a/src/loader/parser.rs b/src/loader/parser.rs index becef77..d8d3607 100644 --- a/src/loader/parser.rs +++ b/src/loader/parser.rs @@ -6,7 +6,7 @@ use super::lexer::{Lexer, Token}; pub struct Parser<'a> { lexer: Lexer<'a>, - peek: Spanned>>, + peek: Option>>, logs: Logs<'a>, } @@ -14,7 +14,7 @@ impl<'a> Parser<'a> { pub fn new(lexer: Lexer<'a>) -> Self { Parser { logs: Logs::new(lexer.input()), - peek: Spanned(None, Span(0,0)), + peek: None, lexer, } } @@ -24,33 +24,39 @@ impl<'a> Parser<'a> { } fn advance_line(&mut self) { + if self.peek_token().is_none(){ + return; + } + if self.expect_token(Token::LineEnd).0 { - self.peek = Spanned(None, Span(0,0)); + self.peek = None; } } - fn next_token(&mut self) -> Spanned>> { - match self.peek.0 { - Some(Token::LineEnd) => return self.peek, - Some(_) => return Spanned(self.peek.0.take(), self.peek.1), + fn next_token(&mut self) -> Option>> { + match self.peek { + Some(Spanned(Token::LineEnd, _)) => return self.peek, + Some(_) => return self.peek.take(), _ => {} } loop { match self.lexer.next() { Some(Spanned(Ok(Token::Comment(_)), _)) => {} Some(Spanned(Ok(Token::LineEnd), span)) => { - self.peek = Spanned(Some(Token::LineEnd), span); + self.peek = Some(Spanned(Token::LineEnd, span)); return self.peek; } - Some(Spanned(Ok(ok), r)) => return Spanned(Some(ok), r), - Some(Spanned(Err(err), span)) => self.logs.emit_error(format!("lexer: {err:?}"), span), - None => return Spanned(None, self.lexer.eof_span()) + Some(Spanned(Ok(ok), r)) => return Some(Spanned(ok, r)), + Some(Spanned(Err(err), span)) => { + self.logs.emit_error(format!("lexer: {err:?}"), span) + } + None => return None, } } } - fn peek_token(&mut self) -> Spanned>> { - if self.peek.0.is_none() { + fn peek_token(&mut self) -> Option>> { + if self.peek.is_none() { self.peek = self.next_token(); } self.peek @@ -79,14 +85,14 @@ impl<'a> Parser<'a> { fn parse_symbol(&mut self) -> Spanned> { match self.next_token() { - Spanned(Some(Token::Tilde), r) => Spanned(Symbol::Epsilon, r), - Spanned(Some(Token::Ident("epsilon")), r) => Spanned(Symbol::Epsilon, r), - Spanned(Some(Token::Ident(super::EPSILON_LOWER)), r) => Spanned(Symbol::Epsilon, r), - Spanned(Some(Token::Ident(ident)), r) => Spanned(Symbol::Ident(ident), r), - Spanned(Some(got), span) => { + Some(Spanned(Token::Tilde, r)) => Spanned(Symbol::Epsilon, r), + Some(Spanned(Token::Ident("epsilon"), r)) => Spanned(Symbol::Epsilon, r), + Some(Spanned(Token::Ident(super::EPSILON_LOWER), r)) => Spanned(Symbol::Epsilon, r), + Some(Spanned(Token::Ident(ident), r)) => Spanned(Symbol::Ident(ident), r), + Some(Spanned(got, span)) => { self.logs.emit_error( format!( - "unexpected token {:#}, expected {:}|{:} (symbol)", + "unexpected {:#}, expected {:} | {:} (symbol)", got, Token::Tilde, Token::Ident("") @@ -95,14 +101,14 @@ impl<'a> Parser<'a> { ); Spanned(Symbol::Ident(""), span) } - Spanned(None, span) => { + None => { self.logs.emit_error( format!( - "unexpected eof expected {:}|{:} (symbol)", + "unexpected eof expected {:} | {:} (symbol)", Token::Tilde, Token::Ident("") ), - span, + self.eof(), ); Spanned(Symbol::Ident(""), self.eof()) } @@ -154,7 +160,7 @@ impl<'a> Parser<'a> { self.next_token(); self.logs.emit_error( format!( - "unexpected token {:#}, expected {:}|{:}|{:}|{:}|{:} (item)", + "unexpected {:#}, expected {:} | {:} | {:} | {:} | {:} (item)", got, Token::Tilde, Token::Ident(""), @@ -169,7 +175,7 @@ impl<'a> Parser<'a> { None => { self.logs.emit_error( format!( - "unexpected eof expected {:}|{:}|{:}|{:}|{:} (item)", + "unexpected eof expected {:} | {:} | {:} | {:} | {:} (item)", Token::Tilde, Token::Ident(""), Token::LPar, @@ -192,7 +198,7 @@ impl<'a> Parser<'a> { Some(Spanned(got, span)) => { self.logs.emit_error( format!( - "unexpected token {:#}, expected {:}|{:}", + "unexpected {:#}, expected {:} | {:}", got, Token::RBrace, Token::RBracket @@ -204,7 +210,7 @@ impl<'a> Parser<'a> { None => { self.logs.emit_error( format!( - "unexpected eof expected {:}|{:}", + "unexpected eof expected {:} | {:}", Token::RBrace, Token::RBracket ), @@ -267,42 +273,62 @@ impl<'a> Parser<'a> { lhs_group_end = sym.1; lhs_group.0.push(sym); } - if !self.expect_token(Token::LSmallArrow).0{ - return Some(Spanned(TopLevel::ProductionRule(Spanned(lhs_group, start.join(lhs_group_end)), Spanned(vec![], lhs_group_end)), start.join(lhs_group_end))) + if !self.expect_token(Token::LSmallArrow).0 { + return Some(Spanned( + TopLevel::ProductionRule( + Spanned(lhs_group, start.join(lhs_group_end)), + Spanned(vec![], lhs_group_end), + ), + start.join(lhs_group_end), + )); } let mut groups = Vec::new(); - - while !matches!(self.peek_token(), None | Some(Spanned(Token::LineEnd, _))){ + + loop { let mut group = ProductionGroup(vec![]); - while !matches!(self.peek_token(), None | Some(Spanned(Token::LineEnd|Token::Or, _))){ + while !matches!( + self.peek_token(), + None | Some(Spanned(Token::LineEnd | Token::Or, _)) + ) { group.0.push(self.parse_symbol()); - } - if group.0.is_empty(){ - let span = if let Some(Spanned(_, span)) = self.peek_token(){ - span - }else{ - self.eof() - }; - self.logs.emit_error("cannot have empty production rule", span); } - if matches!(self.peek_token(), Some(Spanned(Token::Or, _))){ + + if group.0.is_empty() { + let eof = self.eof(); + let span = self.peek_token().map(|t| t.1).unwrap_or(eof); + self.logs + .emit_error("cannot have empty production group", span); + } + + let group_start = group.0.first().map(|g| g.1).unwrap_or(start); + let group_end = group.0.last().map(|g| g.1).unwrap_or(start); + groups.push(Spanned(group, group_start.join(group_end))); + + if matches!(self.peek_token(), Some(Spanned(Token::Or, _))) { self.next_token(); - // if matches!(self.peek_token(), None|Spanned(Token::Or|Token::LineEnd)) - } - let group_start = group.0.first().map(|g|g.1).unwrap_or(start); - let group_end = group.0.last().map(|g|g.1).unwrap_or(start); - groups.push(Spanned(group, group_start.join(group_end))) + } else { + break; + } } - if groups.is_empty(){ - self.logs.emit_error("cannot have empty production rule", start.join(lhs_group_end)); + if groups.is_empty() { + self.logs.emit_error( + "cannot have empty production rule", + start.join(lhs_group_end), + ); } - let rules_start = groups.first().map(|f|f.1).unwrap_or(start); - let rules_end = groups.last().map(|f|f.1).unwrap_or(start); + let rules_start = groups.first().map(|f| f.1).unwrap_or(start); + let rules_end = groups.last().map(|f| f.1).unwrap_or(start); - Some(Spanned(TopLevel::ProductionRule(Spanned(lhs_group, start.join(lhs_group_end)), Spanned(groups, rules_start.join(rules_end))), start.join(rules_end))) + Some(Spanned( + TopLevel::ProductionRule( + Spanned(lhs_group, start.join(lhs_group_end)), + Spanned(groups, rules_start.join(rules_end)), + ), + start.join(rules_end), + )) } fn parse_transition_function( @@ -350,7 +376,7 @@ impl<'a> Parser<'a> { break Some(pr); } } - (Spanned(Token::Ident(ident), start), _) => { + (Spanned(Token::Ident(ident), start), Some(Spanned(Token::Eq, _))) => { let name = Spanned(ident, start); if !self.expect_token(Token::Eq).0 { continue; @@ -359,19 +385,39 @@ impl<'a> Parser<'a> { let span = start.join(item.1); break Some(Spanned(TopLevel::Item(name, item), span)); } + (Spanned(Token::Ident(_), _), after) => { + match after { + Some(Spanned(tok, span)) => { + self.logs.emit_error( + format!( + "unexpected {:#}, expected {:} | {:}", + tok, + Token::Eq, + Token::LSmallArrow + ), + span, + ); + } + None => { + self.logs.emit_error( + format!( + "unexpected eof, expected {:} | {:}", + Token::Eq, + Token::LSmallArrow + ), + self.eof(), + ); + } + } + while !matches!(self.next_token(), None | Some(Spanned(Token::LineEnd, _))) {} + } _ => { self.logs.emit_error( - format!( - "unexpected token {:#}, expected {:}", - next.0, - Token::Ident("") - ), + format!("unexpected {:#}, expected {:}", next.0, Token::Ident("")), next.1, ); - while !matches!(self.next_token(), None|Some(Spanned(Token::LineEnd, _))){ - - } - }, + while !matches!(self.next_token(), None | Some(Spanned(Token::LineEnd, _))) {} + } } }; self.advance_line(); diff --git a/src/main.rs b/src/main.rs index c280171..17d2ae8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,7 +18,7 @@ fn main() { } }; - let input = "aababdsaab"; + let input = "aababaaba"; println!("running on: '{input}'"); let mut simulator = npda::Simulator::begin(input, table); loop {