fixed parser

This commit is contained in:
ParkerTenBroeck 2026-01-05 20:20:08 -05:00
parent 7971c61c74
commit c35d7a9192
4 changed files with 110 additions and 64 deletions

View file

@ -6,7 +6,7 @@ use super::lexer::{Lexer, Token};
pub struct Parser<'a> {
lexer: Lexer<'a>,
peek: Spanned<Option<Token<'a>>>,
peek: Option<Spanned<Token<'a>>>,
logs: Logs<'a>,
}
@ -14,7 +14,7 @@ impl<'a> Parser<'a> {
pub fn new(lexer: Lexer<'a>) -> Self {
Parser {
logs: Logs::new(lexer.input()),
peek: Spanned(None, Span(0,0)),
peek: None,
lexer,
}
}
@ -24,33 +24,39 @@ impl<'a> Parser<'a> {
}
fn advance_line(&mut self) {
if self.peek_token().is_none(){
return;
}
if self.expect_token(Token::LineEnd).0 {
self.peek = Spanned(None, Span(0,0));
self.peek = None;
}
}
fn next_token(&mut self) -> Spanned<Option<Token<'a>>> {
match self.peek.0 {
Some(Token::LineEnd) => return self.peek,
Some(_) => return Spanned(self.peek.0.take(), self.peek.1),
fn next_token(&mut self) -> Option<Spanned<Token<'a>>> {
match self.peek {
Some(Spanned(Token::LineEnd, _)) => return self.peek,
Some(_) => return self.peek.take(),
_ => {}
}
loop {
match self.lexer.next() {
Some(Spanned(Ok(Token::Comment(_)), _)) => {}
Some(Spanned(Ok(Token::LineEnd), span)) => {
self.peek = Spanned(Some(Token::LineEnd), span);
self.peek = Some(Spanned(Token::LineEnd, span));
return self.peek;
}
Some(Spanned(Ok(ok), r)) => return Spanned(Some(ok), r),
Some(Spanned(Err(err), span)) => self.logs.emit_error(format!("lexer: {err:?}"), span),
None => return Spanned(None, self.lexer.eof_span())
Some(Spanned(Ok(ok), r)) => return Some(Spanned(ok, r)),
Some(Spanned(Err(err), span)) => {
self.logs.emit_error(format!("lexer: {err:?}"), span)
}
None => return None,
}
}
}
fn peek_token(&mut self) -> Spanned<Option<Token<'a>>> {
if self.peek.0.is_none() {
fn peek_token(&mut self) -> Option<Spanned<Token<'a>>> {
if self.peek.is_none() {
self.peek = self.next_token();
}
self.peek
@ -79,14 +85,14 @@ impl<'a> Parser<'a> {
fn parse_symbol(&mut self) -> Spanned<Symbol<'a>> {
match self.next_token() {
Spanned(Some(Token::Tilde), r) => Spanned(Symbol::Epsilon, r),
Spanned(Some(Token::Ident("epsilon")), r) => Spanned(Symbol::Epsilon, r),
Spanned(Some(Token::Ident(super::EPSILON_LOWER)), r) => Spanned(Symbol::Epsilon, r),
Spanned(Some(Token::Ident(ident)), r) => Spanned(Symbol::Ident(ident), r),
Spanned(Some(got), span) => {
Some(Spanned(Token::Tilde, r)) => Spanned(Symbol::Epsilon, r),
Some(Spanned(Token::Ident("epsilon"), r)) => Spanned(Symbol::Epsilon, r),
Some(Spanned(Token::Ident(super::EPSILON_LOWER), r)) => Spanned(Symbol::Epsilon, r),
Some(Spanned(Token::Ident(ident), r)) => Spanned(Symbol::Ident(ident), r),
Some(Spanned(got, span)) => {
self.logs.emit_error(
format!(
"unexpected token {:#}, expected {:}|{:} (symbol)",
"unexpected {:#}, expected {:} | {:} (symbol)",
got,
Token::Tilde,
Token::Ident("")
@ -95,14 +101,14 @@ impl<'a> Parser<'a> {
);
Spanned(Symbol::Ident("<INVALID>"), span)
}
Spanned(None, span) => {
None => {
self.logs.emit_error(
format!(
"unexpected eof expected {:}|{:} (symbol)",
"unexpected eof expected {:} | {:} (symbol)",
Token::Tilde,
Token::Ident("")
),
span,
self.eof(),
);
Spanned(Symbol::Ident("<INVALID>"), self.eof())
}
@ -154,7 +160,7 @@ impl<'a> Parser<'a> {
self.next_token();
self.logs.emit_error(
format!(
"unexpected token {:#}, expected {:}|{:}|{:}|{:}|{:} (item)",
"unexpected {:#}, expected {:} | {:} | {:} | {:} | {:} (item)",
got,
Token::Tilde,
Token::Ident(""),
@ -169,7 +175,7 @@ impl<'a> Parser<'a> {
None => {
self.logs.emit_error(
format!(
"unexpected eof expected {:}|{:}|{:}|{:}|{:} (item)",
"unexpected eof expected {:} | {:} | {:} | {:} | {:} (item)",
Token::Tilde,
Token::Ident(""),
Token::LPar,
@ -192,7 +198,7 @@ impl<'a> Parser<'a> {
Some(Spanned(got, span)) => {
self.logs.emit_error(
format!(
"unexpected token {:#}, expected {:}|{:}",
"unexpected {:#}, expected {:} | {:}",
got,
Token::RBrace,
Token::RBracket
@ -204,7 +210,7 @@ impl<'a> Parser<'a> {
None => {
self.logs.emit_error(
format!(
"unexpected eof expected {:}|{:}",
"unexpected eof expected {:} | {:}",
Token::RBrace,
Token::RBracket
),
@ -267,42 +273,62 @@ impl<'a> Parser<'a> {
lhs_group_end = sym.1;
lhs_group.0.push(sym);
}
if !self.expect_token(Token::LSmallArrow).0{
return Some(Spanned(TopLevel::ProductionRule(Spanned(lhs_group, start.join(lhs_group_end)), Spanned(vec![], lhs_group_end)), start.join(lhs_group_end)))
if !self.expect_token(Token::LSmallArrow).0 {
return Some(Spanned(
TopLevel::ProductionRule(
Spanned(lhs_group, start.join(lhs_group_end)),
Spanned(vec![], lhs_group_end),
),
start.join(lhs_group_end),
));
}
let mut groups = Vec::new();
while !matches!(self.peek_token(), None | Some(Spanned(Token::LineEnd, _))){
loop {
let mut group = ProductionGroup(vec![]);
while !matches!(self.peek_token(), None | Some(Spanned(Token::LineEnd|Token::Or, _))){
while !matches!(
self.peek_token(),
None | Some(Spanned(Token::LineEnd | Token::Or, _))
) {
group.0.push(self.parse_symbol());
}
if group.0.is_empty(){
let span = if let Some(Spanned(_, span)) = self.peek_token(){
span
}else{
self.eof()
};
self.logs.emit_error("cannot have empty production rule", span);
}
if matches!(self.peek_token(), Some(Spanned(Token::Or, _))){
if group.0.is_empty() {
let eof = self.eof();
let span = self.peek_token().map(|t| t.1).unwrap_or(eof);
self.logs
.emit_error("cannot have empty production group", span);
}
let group_start = group.0.first().map(|g| g.1).unwrap_or(start);
let group_end = group.0.last().map(|g| g.1).unwrap_or(start);
groups.push(Spanned(group, group_start.join(group_end)));
if matches!(self.peek_token(), Some(Spanned(Token::Or, _))) {
self.next_token();
// if matches!(self.peek_token(), None|Spanned(Token::Or|Token::LineEnd))
}
let group_start = group.0.first().map(|g|g.1).unwrap_or(start);
let group_end = group.0.last().map(|g|g.1).unwrap_or(start);
groups.push(Spanned(group, group_start.join(group_end)))
} else {
break;
}
}
if groups.is_empty(){
self.logs.emit_error("cannot have empty production rule", start.join(lhs_group_end));
if groups.is_empty() {
self.logs.emit_error(
"cannot have empty production rule",
start.join(lhs_group_end),
);
}
let rules_start = groups.first().map(|f|f.1).unwrap_or(start);
let rules_end = groups.last().map(|f|f.1).unwrap_or(start);
let rules_start = groups.first().map(|f| f.1).unwrap_or(start);
let rules_end = groups.last().map(|f| f.1).unwrap_or(start);
Some(Spanned(TopLevel::ProductionRule(Spanned(lhs_group, start.join(lhs_group_end)), Spanned(groups, rules_start.join(rules_end))), start.join(rules_end)))
Some(Spanned(
TopLevel::ProductionRule(
Spanned(lhs_group, start.join(lhs_group_end)),
Spanned(groups, rules_start.join(rules_end)),
),
start.join(rules_end),
))
}
fn parse_transition_function(
@ -350,7 +376,7 @@ impl<'a> Parser<'a> {
break Some(pr);
}
}
(Spanned(Token::Ident(ident), start), _) => {
(Spanned(Token::Ident(ident), start), Some(Spanned(Token::Eq, _))) => {
let name = Spanned(ident, start);
if !self.expect_token(Token::Eq).0 {
continue;
@ -359,19 +385,39 @@ impl<'a> Parser<'a> {
let span = start.join(item.1);
break Some(Spanned(TopLevel::Item(name, item), span));
}
(Spanned(Token::Ident(_), _), after) => {
match after {
Some(Spanned(tok, span)) => {
self.logs.emit_error(
format!(
"unexpected {:#}, expected {:} | {:}",
tok,
Token::Eq,
Token::LSmallArrow
),
span,
);
}
None => {
self.logs.emit_error(
format!(
"unexpected eof, expected {:} | {:}",
Token::Eq,
Token::LSmallArrow
),
self.eof(),
);
}
}
while !matches!(self.next_token(), None | Some(Spanned(Token::LineEnd, _))) {}
}
_ => {
self.logs.emit_error(
format!(
"unexpected token {:#}, expected {:}",
next.0,
Token::Ident("")
),
format!("unexpected {:#}, expected {:}", next.0, Token::Ident("")),
next.1,
);
while !matches!(self.next_token(), None|Some(Spanned(Token::LineEnd, _))){
}
},
while !matches!(self.next_token(), None | Some(Spanned(Token::LineEnd, _))) {}
}
}
};
self.advance_line();