mirror of
https://github.com/ParkerTenBroeck/automata.git
synced 2026-06-07 05:28:45 -04:00
moved AST into seperate file
This commit is contained in:
parent
7970cb197a
commit
7e0d4ca69a
6 changed files with 116 additions and 83 deletions
|
|
@ -5,7 +5,6 @@
|
||||||
# Replace llvmPackages with llvmPackages_X, where X is the latest LLVM version (at the time of writing, 16)
|
# Replace llvmPackages with llvmPackages_X, where X is the latest LLVM version (at the time of writing, 16)
|
||||||
llvmPackages.bintools
|
llvmPackages.bintools
|
||||||
rustup
|
rustup
|
||||||
nasm
|
|
||||||
];
|
];
|
||||||
RUSTC_VERSION = "nightly";
|
RUSTC_VERSION = "nightly";
|
||||||
# https://github.com/rust-lang/rust-bindgen#environment-variables
|
# https://github.com/rust-lang/rust-bindgen#environment-variables
|
||||||
|
|
|
||||||
50
src/ast.rs
Normal file
50
src/ast.rs
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
|
use crate::lexer::Spanned;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Tuple<'a>(pub Vec<Spanned<Symbol<'a>>>);
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub enum Symbol<'a> {
|
||||||
|
Epsilon,
|
||||||
|
Ident(&'a str),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub enum Dest<'a> {
|
||||||
|
Ident(&'a str),
|
||||||
|
Function(Spanned<&'a str>, Spanned<Tuple<'a>>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub enum Item<'a> {
|
||||||
|
Symbol(Symbol<'a>),
|
||||||
|
Tuple(Tuple<'a>),
|
||||||
|
List(List<'a>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub enum Regex<'a>{
|
||||||
|
Terminal(&'a str),
|
||||||
|
Match{
|
||||||
|
complement: bool,
|
||||||
|
patterns: Vec<Range<char>>
|
||||||
|
},
|
||||||
|
Concat(Vec<Regex<'a>>),
|
||||||
|
Star(Box<Regex<'a>>),
|
||||||
|
Plus(Box<Regex<'a>>),
|
||||||
|
Union(Vec<Regex<'a>>),
|
||||||
|
Intersection(Vec<Regex<'a>>),
|
||||||
|
Complement(Box<Regex<'a>>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct List<'a>(pub Vec<Spanned<Item<'a>>>);
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub enum TopLevel<'a> {
|
||||||
|
Assignment(Spanned<Dest<'a>>, Spanned<Item<'a>>),
|
||||||
|
ProductionRule(Spanned<Symbol<'a>>, Spanned<Symbol<'a>>),
|
||||||
|
Table(),
|
||||||
|
}
|
||||||
32
src/lexer.rs
32
src/lexer.rs
|
|
@ -29,21 +29,21 @@ pub enum Token<'a> {
|
||||||
impl<'a> std::fmt::Display for Token<'a> {
|
impl<'a> std::fmt::Display for Token<'a> {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Token::LPar => write!(f, ")"),
|
Token::LPar => write!(f, "')'"),
|
||||||
Token::RPar => write!(f, "("),
|
Token::RPar => write!(f, "'('"),
|
||||||
Token::LBrace => write!(f, "{{"),
|
Token::LBrace => write!(f, "'{{'"),
|
||||||
Token::RBrace => write!(f, "}}"),
|
Token::RBrace => write!(f, "'}}'"),
|
||||||
Token::LBracket => write!(f, "["),
|
Token::LBracket => write!(f, "'['"),
|
||||||
Token::RBracket => write!(f, "]"),
|
Token::RBracket => write!(f, "']'"),
|
||||||
Token::Tilde => write!(f, "~"),
|
Token::Tilde => write!(f, "'~'"),
|
||||||
Token::Eq => write!(f, "="),
|
Token::Eq => write!(f, "'='"),
|
||||||
Token::Comma => write!(f, ","),
|
Token::Comma => write!(f, "','"),
|
||||||
Token::Or => write!(f, "|"),
|
Token::Or => write!(f, "'|'"),
|
||||||
Token::Plus => write!(f, "+"),
|
Token::Plus => write!(f, "'+'"),
|
||||||
Token::Star => write!(f, "*"),
|
Token::Star => write!(f, "'*'"),
|
||||||
Token::And => write!(f, "&"),
|
Token::And => write!(f, "'&'"),
|
||||||
Token::LSmallArrow => write!(f, "->"),
|
Token::LSmallArrow => write!(f, "'->'"),
|
||||||
Token::LBigArrow => write!(f, "=>"),
|
Token::LBigArrow => write!(f, "'=>'"),
|
||||||
Token::Comment(_) => write!(f, "<comment>"),
|
Token::Comment(_) => write!(f, "<comment>"),
|
||||||
Token::Ident(ident) if f.alternate() => write!(f, "{ident:?}"),
|
Token::Ident(ident) if f.alternate() => write!(f, "{ident:?}"),
|
||||||
Token::Ident(_) => write!(f, "ident"),
|
Token::Ident(_) => write!(f, "ident"),
|
||||||
|
|
@ -126,7 +126,7 @@ impl<'a> std::iter::Iterator for Lexer<'a> {
|
||||||
|
|
||||||
let res = match self.consume()? {
|
let res = match self.consume()? {
|
||||||
'(' => Ok(Token::LPar),
|
'(' => Ok(Token::LPar),
|
||||||
')' => Ok(Token::LPar),
|
')' => Ok(Token::RPar),
|
||||||
'{' => Ok(Token::LBrace),
|
'{' => Ok(Token::LBrace),
|
||||||
'}' => Ok(Token::RBrace),
|
'}' => Ok(Token::RBrace),
|
||||||
'[' => Ok(Token::LBracket),
|
'[' => Ok(Token::LBracket),
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
pub mod dfa;
|
pub mod dfa;
|
||||||
pub mod lexer;
|
pub mod lexer;
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
|
pub mod ast;
|
||||||
|
|
||||||
pub struct SymbolMap<T>([T; 256]);
|
pub struct SymbolMap<T>([T; 256]);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,37 +1,8 @@
|
||||||
use std::iter::Peekable;
|
use std::{iter::Peekable};
|
||||||
|
use crate::ast::*;
|
||||||
use crate::lexer::{Lexer, Span, Spanned, Token};
|
use crate::lexer::{Lexer, Span, Spanned, Token};
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct Tuple<'a>(pub Vec<Spanned<Symbol<'a>>>);
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub enum Symbol<'a> {
|
|
||||||
Epsilon,
|
|
||||||
Ident(&'a str),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub enum Dest<'a> {
|
|
||||||
Ident(&'a str),
|
|
||||||
Function(Spanned<&'a str>, Spanned<Tuple<'a>>),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub enum Item<'a> {
|
|
||||||
Symbol(Symbol<'a>),
|
|
||||||
Tuple(Tuple<'a>),
|
|
||||||
List(List<'a>)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct List<'a>(pub Vec<Spanned<Item<'a>>>);
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub enum TopLevel<'a> {
|
|
||||||
Assignment(Spanned<Dest<'a>>, Spanned<Item<'a>>),
|
|
||||||
Table(),
|
|
||||||
}
|
|
||||||
|
|
||||||
pub enum LogKind {
|
pub enum LogKind {
|
||||||
Lexer,
|
Lexer,
|
||||||
|
|
@ -59,7 +30,11 @@ pub struct Parser<'a> {
|
||||||
|
|
||||||
impl<'a> Parser<'a> {
|
impl<'a> Parser<'a> {
|
||||||
pub fn new(lexer: Lexer<'a>) -> Self {
|
pub fn new(lexer: Lexer<'a>) -> Self {
|
||||||
Parser { eof: lexer.eof_span(), lexer: lexer.peekable(), log: Vec::new() }
|
Parser {
|
||||||
|
eof: lexer.eof_span(),
|
||||||
|
lexer: lexer.peekable(),
|
||||||
|
log: Vec::new(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn next_token(&mut self) -> Option<Spanned<Token<'a>>> {
|
fn next_token(&mut self) -> Option<Spanned<Token<'a>>> {
|
||||||
|
|
@ -80,7 +55,6 @@ impl<'a> Parser<'a> {
|
||||||
fn peek_token(&mut self) -> Option<Spanned<Token<'a>>> {
|
fn peek_token(&mut self) -> Option<Spanned<Token<'a>>> {
|
||||||
loop {
|
loop {
|
||||||
match *self.lexer.peek()? {
|
match *self.lexer.peek()? {
|
||||||
// not a heavy clone but because of range
|
|
||||||
Spanned(Ok(ok), r) => return Some(Spanned(ok, r)),
|
Spanned(Ok(ok), r) => return Some(Spanned(ok, r)),
|
||||||
Spanned(Err(err), r) => self.log.push(Log {
|
Spanned(Err(err), r) => self.log.push(Log {
|
||||||
message: format!("{err:?}"),
|
message: format!("{err:?}"),
|
||||||
|
|
@ -156,7 +130,7 @@ impl<'a> Parser<'a> {
|
||||||
let mut items = Vec::new();
|
let mut items = Vec::new();
|
||||||
let (matched, start) = self.expect_token(Token::LPar);
|
let (matched, start) = self.expect_token(Token::LPar);
|
||||||
if !matched {
|
if !matched {
|
||||||
return Spanned(Tuple(Vec::new()), start)
|
return Spanned(Tuple(Vec::new()), start);
|
||||||
}
|
}
|
||||||
|
|
||||||
while !matches!(self.peek_token(), Some(Spanned(Token::RPar, _))) {
|
while !matches!(self.peek_token(), Some(Spanned(Token::RPar, _))) {
|
||||||
|
|
@ -166,10 +140,7 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
if self.peek_token().is_none() {
|
if self.peek_token().is_none() {
|
||||||
self.log.push(Log {
|
self.log.push(Log {
|
||||||
message: format!(
|
message: format!("unexpected eof expected {:}", Token::RPar),
|
||||||
"unexpected eof expected {:}",
|
|
||||||
Token::RPar
|
|
||||||
),
|
|
||||||
range: self.eof,
|
range: self.eof,
|
||||||
level: LogLevel::Error,
|
level: LogLevel::Error,
|
||||||
kind: LogKind::Lexer,
|
kind: LogKind::Lexer,
|
||||||
|
|
@ -185,7 +156,9 @@ impl<'a> Parser<'a> {
|
||||||
|
|
||||||
pub fn parse_item(&mut self) -> Spanned<Item<'a>> {
|
pub fn parse_item(&mut self) -> Spanned<Item<'a>> {
|
||||||
match self.peek_token() {
|
match self.peek_token() {
|
||||||
Some(Spanned(Token::Ident(_)|Token::Tilde, _)) => self.parse_symbol().map(Item::Symbol),
|
Some(Spanned(Token::Ident(_) | Token::Tilde, _)) => {
|
||||||
|
self.parse_symbol().map(Item::Symbol)
|
||||||
|
}
|
||||||
Some(Spanned(Token::LPar, _)) => self.parse_tupple().map(Item::Tuple),
|
Some(Spanned(Token::LPar, _)) => self.parse_tupple().map(Item::Tuple),
|
||||||
Some(Spanned(Token::LBrace, _)) => self.parse_list().map(Item::List),
|
Some(Spanned(Token::LBrace, _)) => self.parse_list().map(Item::List),
|
||||||
Some(Spanned(got, r)) => {
|
Some(Spanned(got, r)) => {
|
||||||
|
|
@ -226,7 +199,7 @@ impl<'a> Parser<'a> {
|
||||||
let mut list = Vec::new();
|
let mut list = Vec::new();
|
||||||
let (matched, start) = self.expect_token(Token::LBrace);
|
let (matched, start) = self.expect_token(Token::LBrace);
|
||||||
if !matched {
|
if !matched {
|
||||||
return Spanned(List(Vec::new()), start)
|
return Spanned(List(Vec::new()), start);
|
||||||
}
|
}
|
||||||
|
|
||||||
while !matches!(self.peek_token(), Some(Spanned(Token::RBrace, _))) {
|
while !matches!(self.peek_token(), Some(Spanned(Token::RBrace, _))) {
|
||||||
|
|
@ -236,10 +209,7 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
if self.peek_token().is_none() {
|
if self.peek_token().is_none() {
|
||||||
self.log.push(Log {
|
self.log.push(Log {
|
||||||
message: format!(
|
message: format!("unexpected eof expected {:}", Token::RBrace),
|
||||||
"unexpected eof expected {:}",
|
|
||||||
Token::RBrace
|
|
||||||
),
|
|
||||||
range: self.eof,
|
range: self.eof,
|
||||||
level: LogLevel::Error,
|
level: LogLevel::Error,
|
||||||
kind: LogKind::Lexer,
|
kind: LogKind::Lexer,
|
||||||
|
|
@ -251,28 +221,41 @@ impl<'a> Parser<'a> {
|
||||||
Spanned(List(list), start.join(end))
|
Spanned(List(list), start.join(end))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn parse_regex(&mut self) -> Spanned<Regex<'a>>{
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn parse_elements(&mut self) -> Vec<Spanned<TopLevel<'a>>> {
|
pub fn parse_elements(&mut self) -> Vec<Spanned<TopLevel<'a>>> {
|
||||||
let mut result = Vec::new();
|
let mut result = Vec::new();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let Some(next) = self.next_token() else { break };
|
let Some(next) = self.next_token() else { break };
|
||||||
match next {
|
match (next, self.peek_token()) {
|
||||||
Spanned(Token::Ident(ident), ident_range) => {
|
(Spanned(Token::Ident(ident), start), Some(Spanned(Token::LPar, _))) => {
|
||||||
let dest @ Spanned(_, start) = if matches!(self.peek_token(), Some(Spanned(Token::LPar, _))) {
|
|
||||||
let tuple = self.parse_tupple();
|
let tuple = self.parse_tupple();
|
||||||
let span = ident_range.join(tuple.1);
|
let span = start.join(tuple.1);
|
||||||
Spanned(Dest::Function(Spanned(ident, ident_range), tuple), span)
|
let dest = Spanned(Dest::Function(Spanned(ident, start), tuple), span);
|
||||||
} else {
|
self.expect_token(Token::Eq);
|
||||||
Spanned(Dest::Ident(ident), ident_range)
|
let item = self.parse_item();
|
||||||
};
|
let span = start.join(item.1);
|
||||||
|
result.push(Spanned(TopLevel::Assignment(dest, item), span));
|
||||||
|
}
|
||||||
|
(Spanned(Token::Ident(_), _), Some(Spanned(Token::LSmallArrow|Token::Ident(_), _))) => {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
(Spanned(Token::Ident(ident), start), _) => {
|
||||||
|
let dest = Spanned(Dest::Ident(ident), start);
|
||||||
self.expect_token(Token::Eq);
|
self.expect_token(Token::Eq);
|
||||||
|
|
||||||
let item = self.parse_item();
|
let item = self.parse_item();
|
||||||
let span = start.join(item.1);
|
let span = start.join(item.1);
|
||||||
result.push(Spanned(TopLevel::Assignment(dest, item), span));
|
result.push(Spanned(TopLevel::Assignment(dest, item), span));
|
||||||
}
|
}
|
||||||
_ => self.log.push(Log {
|
_ => self.log.push(Log {
|
||||||
message: format!("unexpected token {:#}, expected {:}", next.0, Token::Ident("")),
|
message: format!(
|
||||||
|
"unexpected token {:#}, expected {:}",
|
||||||
|
next.0,
|
||||||
|
Token::Ident("")
|
||||||
|
),
|
||||||
range: next.1,
|
range: next.1,
|
||||||
level: LogLevel::Error,
|
level: LogLevel::Error,
|
||||||
kind: LogKind::Lexer,
|
kind: LogKind::Lexer,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue