Add parser for syntax tree
This commit is contained in:
parent
830b3b10d9
commit
9d24571b40
|
@ -8,6 +8,7 @@ edition = "2021"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
colored = "2.1.0"
|
colored = "2.1.0"
|
||||||
derive_more = { version = "0.99.17", default-features = false, features = ["deref", "from", "deref_mut"] }
|
derive_more = { version = "0.99.17", default-features = false, features = ["deref", "from", "deref_mut"] }
|
||||||
|
enum-as-inner = "0.6.0"
|
||||||
getset = "0.1.2"
|
getset = "0.1.2"
|
||||||
strum = { version = "0.26.2", features = ["derive"] }
|
strum = { version = "0.26.2", features = ["derive"] }
|
||||||
strum_macros = "0.26.2"
|
strum_macros = "0.26.2"
|
||||||
|
|
|
@ -8,6 +8,8 @@ pub enum Error {
|
||||||
IoError(#[from] io::Error),
|
IoError(#[from] io::Error),
|
||||||
#[error("An error occured while tokenizing the source code.")]
|
#[error("An error occured while tokenizing the source code.")]
|
||||||
TokenizeError(#[from] crate::lexical::token::TokenizeError),
|
TokenizeError(#[from] crate::lexical::token::TokenizeError),
|
||||||
|
#[error("An error occurred while parsing the source code.")]
|
||||||
|
ParseError(#[from] crate::syntax::error::Error),
|
||||||
#[error("An error occurred")]
|
#[error("An error occurred")]
|
||||||
Other(&'static str),
|
Other(&'static str),
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,6 +71,7 @@ impl SourceFile {
|
||||||
SourceIterator {
|
SourceIterator {
|
||||||
source_file: self,
|
source_file: self,
|
||||||
iterator: self.content().char_indices().peekable(),
|
iterator: self.content().char_indices().peekable(),
|
||||||
|
prev: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -299,6 +300,9 @@ pub struct SourceIterator<'a> {
|
||||||
#[get_copy = "pub"]
|
#[get_copy = "pub"]
|
||||||
source_file: &'a Arc<SourceFile>,
|
source_file: &'a Arc<SourceFile>,
|
||||||
iterator: Peekable<CharIndices<'a>>,
|
iterator: Peekable<CharIndices<'a>>,
|
||||||
|
/// Get the previous character that was iterated over.
|
||||||
|
#[get_copy = "pub"]
|
||||||
|
prev: Option<(usize, char)>,
|
||||||
}
|
}
|
||||||
impl<'a> SourceIterator<'a> {
|
impl<'a> SourceIterator<'a> {
|
||||||
/// Peek at the next character in the source file.
|
/// Peek at the next character in the source file.
|
||||||
|
@ -310,7 +314,11 @@ impl<'a> Iterator for SourceIterator<'a> {
|
||||||
type Item = (usize, char);
|
type Item = (usize, char);
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
self.iterator.next()
|
let item = self.iterator.next();
|
||||||
|
if item.is_some() {
|
||||||
|
self.prev = item;
|
||||||
|
}
|
||||||
|
item
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -74,6 +74,7 @@ pub enum Token {
|
||||||
Punctuation(Punctuation),
|
Punctuation(Punctuation),
|
||||||
Numeric(Numeric),
|
Numeric(Numeric),
|
||||||
Comment(Comment),
|
Comment(Comment),
|
||||||
|
LiteralCommand(LiteralCommand),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Token {
|
impl Token {
|
||||||
|
@ -87,6 +88,7 @@ impl Token {
|
||||||
Self::Punctuation(token) => &token.span,
|
Self::Punctuation(token) => &token.span,
|
||||||
Self::Numeric(token) => &token.span,
|
Self::Numeric(token) => &token.span,
|
||||||
Self::Comment(token) => &token.span,
|
Self::Comment(token) => &token.span,
|
||||||
|
Self::LiteralCommand(token) => &token.span,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -100,6 +102,7 @@ impl SourceElement for Token {
|
||||||
Self::Punctuation(token) => token.span(),
|
Self::Punctuation(token) => token.span(),
|
||||||
Self::Numeric(token) => token.span(),
|
Self::Numeric(token) => token.span(),
|
||||||
Self::Comment(token) => token.span(),
|
Self::Comment(token) => token.span(),
|
||||||
|
Self::LiteralCommand(token) => token.span(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -200,6 +203,26 @@ impl SourceElement for Comment {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Represents a hardcoded literal command in the source code.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct LiteralCommand {
|
||||||
|
/// Span that makes up the token.
|
||||||
|
pub span: Span,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SourceElement for LiteralCommand {
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
self.span.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl LiteralCommand {
|
||||||
|
/// Returns the command without the leading slash.
|
||||||
|
#[must_use]
|
||||||
|
pub fn clean_command(&self) -> &str {
|
||||||
|
&self.span.str().trim()[1..]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Is an error that can occur when invoking the [`Token::tokenize`] method.
|
/// Is an error that can occur when invoking the [`Token::tokenize`] method.
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, thiserror::Error, From)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, thiserror::Error, From)]
|
||||||
#[allow(missing_docs)]
|
#[allow(missing_docs)]
|
||||||
|
@ -283,6 +306,7 @@ impl Token {
|
||||||
iter: &mut SourceIterator,
|
iter: &mut SourceIterator,
|
||||||
start: usize,
|
start: usize,
|
||||||
character: char,
|
character: char,
|
||||||
|
prev_token: Option<&Self>,
|
||||||
handler: &impl Handler<Error>,
|
handler: &impl Handler<Error>,
|
||||||
) -> Result<Self, TokenizeError> {
|
) -> Result<Self, TokenizeError> {
|
||||||
// Single line comment
|
// Single line comment
|
||||||
|
@ -341,6 +365,10 @@ impl Token {
|
||||||
return Err(TokenizeError::FatalLexicalError);
|
return Err(TokenizeError::FatalLexicalError);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// When there is no second slash and at the start of a line
|
||||||
|
else if prev_token.map_or(true, |token| token.span().str().contains('\n')) {
|
||||||
|
Ok(Self::handle_literal_command(iter, start))
|
||||||
|
}
|
||||||
// Just a single slash punctuation
|
// Just a single slash punctuation
|
||||||
else {
|
else {
|
||||||
Ok(Punctuation {
|
Ok(Punctuation {
|
||||||
|
@ -362,6 +390,16 @@ impl Token {
|
||||||
.into()
|
.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Handles a command that is preceeded by a slash
|
||||||
|
fn handle_literal_command(iter: &mut SourceIterator, start: usize) -> Self {
|
||||||
|
Self::walk_iter(iter, |c| !(c.is_whitespace() && c.is_ascii_control()));
|
||||||
|
|
||||||
|
LiteralCommand {
|
||||||
|
span: Self::create_span(start, iter),
|
||||||
|
}
|
||||||
|
.into()
|
||||||
|
}
|
||||||
|
|
||||||
/// Lexes the source code from the given iterator.
|
/// Lexes the source code from the given iterator.
|
||||||
///
|
///
|
||||||
/// The tokenization starts at the current location of the iterator. The function moves the
|
/// The tokenization starts at the current location of the iterator. The function moves the
|
||||||
|
@ -375,6 +413,7 @@ impl Token {
|
||||||
pub fn tokenize(
|
pub fn tokenize(
|
||||||
iter: &mut SourceIterator,
|
iter: &mut SourceIterator,
|
||||||
handler: &impl Handler<Error>,
|
handler: &impl Handler<Error>,
|
||||||
|
prev_token: Option<&Self>,
|
||||||
) -> Result<Self, TokenizeError> {
|
) -> Result<Self, TokenizeError> {
|
||||||
// Gets the first character
|
// Gets the first character
|
||||||
let (start, character) = iter
|
let (start, character) = iter
|
||||||
|
@ -391,7 +430,7 @@ impl Token {
|
||||||
}
|
}
|
||||||
// Found comment/single slash punctuation
|
// Found comment/single slash punctuation
|
||||||
else if character == '/' {
|
else if character == '/' {
|
||||||
Self::handle_comment(iter, start, character, handler)
|
Self::handle_comment(iter, start, character, prev_token, handler)
|
||||||
}
|
}
|
||||||
// Found numeric literal
|
// Found numeric literal
|
||||||
else if character.is_ascii_digit() {
|
else if character.is_ascii_digit() {
|
||||||
|
|
|
@ -47,7 +47,7 @@ impl TokenStream {
|
||||||
|
|
||||||
// Tokenize the source code.
|
// Tokenize the source code.
|
||||||
loop {
|
loop {
|
||||||
match Token::tokenize(&mut source_file_iterator, handler) {
|
match Token::tokenize(&mut source_file_iterator, handler, tokens.last()) {
|
||||||
Ok(token) => tokens.push(token),
|
Ok(token) => tokens.push(token),
|
||||||
Err(TokenizeError::EndOfSourceCodeIteratorArgument) => {
|
Err(TokenizeError::EndOfSourceCodeIteratorArgument) => {
|
||||||
break;
|
break;
|
||||||
|
|
22
src/lib.rs
22
src/lib.rs
|
@ -3,25 +3,24 @@
|
||||||
//! `ShulkerScript` is a simple, imperative scripting language for creating Minecraft data packs.
|
//! `ShulkerScript` is a simple, imperative scripting language for creating Minecraft data packs.
|
||||||
|
|
||||||
#![deny(
|
#![deny(
|
||||||
missing_docs,
|
|
||||||
missing_debug_implementations,
|
missing_debug_implementations,
|
||||||
missing_copy_implementations,
|
missing_copy_implementations,
|
||||||
clippy::all,
|
|
||||||
clippy::pedantic,
|
|
||||||
clippy::nursery,
|
clippy::nursery,
|
||||||
rustdoc::broken_intra_doc_links,
|
rustdoc::broken_intra_doc_links,
|
||||||
clippy::missing_errors_doc
|
clippy::missing_errors_doc
|
||||||
)]
|
)]
|
||||||
|
#![warn(missing_docs, clippy::all, clippy::pedantic)]
|
||||||
#![allow(clippy::missing_panics_doc, clippy::missing_const_for_fn)]
|
#![allow(clippy::missing_panics_doc, clippy::missing_const_for_fn)]
|
||||||
|
|
||||||
pub mod base;
|
pub mod base;
|
||||||
pub mod lexical;
|
pub mod lexical;
|
||||||
|
pub mod syntax;
|
||||||
|
|
||||||
use std::{cell::Cell, fmt::Display, path::PathBuf};
|
use std::{cell::Cell, fmt::Display, path::PathBuf};
|
||||||
|
|
||||||
use base::{source_file::SourceFile, Handler, Result};
|
use base::{source_file::SourceFile, Handler, Result};
|
||||||
|
|
||||||
use crate::{base::Error, lexical::token_stream::TokenStream};
|
use crate::{base::Error, lexical::token_stream::TokenStream, syntax::parser::Parser};
|
||||||
|
|
||||||
/// Compiles the given source code.
|
/// Compiles the given source code.
|
||||||
///
|
///
|
||||||
|
@ -34,14 +33,25 @@ pub fn compile(path: PathBuf) -> Result<()> {
|
||||||
|
|
||||||
let tokens = TokenStream::tokenize(&source_file, &printer);
|
let tokens = TokenStream::tokenize(&source_file, &printer);
|
||||||
|
|
||||||
println!("{tokens:#?}");
|
|
||||||
|
|
||||||
if printer.has_printed() {
|
if printer.has_printed() {
|
||||||
return Err(Error::Other(
|
return Err(Error::Other(
|
||||||
"An error occurred while tokenizing the source code.",
|
"An error occurred while tokenizing the source code.",
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut parser = Parser::new(&tokens);
|
||||||
|
let result = parser.parse_program(&printer).ok_or(Error::Other(
|
||||||
|
"An error occured while parsing the source code.",
|
||||||
|
))?;
|
||||||
|
|
||||||
|
println!("result: {result:#?}");
|
||||||
|
|
||||||
|
if printer.has_printed() {
|
||||||
|
return Err(Error::Other(
|
||||||
|
"An error occurred while parsing the source code.",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,84 @@
|
||||||
|
//! Contains the error types that can occur while parsing the syntax of the language.
|
||||||
|
|
||||||
|
use std::fmt::Display;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
base::log::{Message, Severity, SourceCodeDisplay},
|
||||||
|
lexical::token::{KeywordKind, Token},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Enumeration containing all kinds of syntax that can be failed to parse.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
#[allow(missing_docs)]
|
||||||
|
pub enum SyntaxKind {
|
||||||
|
Punctuation(char),
|
||||||
|
Keyword(KeywordKind),
|
||||||
|
Identifier,
|
||||||
|
Declaration,
|
||||||
|
Numeric,
|
||||||
|
Statement,
|
||||||
|
Expression,
|
||||||
|
Type,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A syntax/token is expected but found an other invalid token.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct UnexpectedSyntax {
|
||||||
|
/// The kind of syntax that was expected.
|
||||||
|
pub expected: SyntaxKind,
|
||||||
|
|
||||||
|
/// The invalid token that was found.
|
||||||
|
pub found: Option<Token>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for UnexpectedSyntax {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let expected_binding = match self.expected {
|
||||||
|
SyntaxKind::Identifier => "an identifier token".to_string(),
|
||||||
|
SyntaxKind::Punctuation(char) => format!("a punctuation token `{char}`"),
|
||||||
|
SyntaxKind::Keyword(keyword) => format!("a keyword token `{}`", keyword.as_str()),
|
||||||
|
SyntaxKind::Declaration => "a declaration token".to_string(),
|
||||||
|
SyntaxKind::Numeric => "a numeric token".to_string(),
|
||||||
|
SyntaxKind::Statement => "a statement syntax".to_string(),
|
||||||
|
SyntaxKind::Expression => "an expression syntax".to_string(),
|
||||||
|
SyntaxKind::Type => "a type syntax".to_string(),
|
||||||
|
};
|
||||||
|
let found_binding = match self.found.clone() {
|
||||||
|
Some(Token::Comment(..)) => "a comment token".to_string(),
|
||||||
|
Some(Token::Identifier(..)) => "an identifier token".to_string(),
|
||||||
|
Some(Token::Keyword(keyword)) => {
|
||||||
|
format!("a keyword token `{}`", keyword.keyword.as_str())
|
||||||
|
}
|
||||||
|
Some(Token::WhiteSpaces(..)) => "a white spaces token".to_string(),
|
||||||
|
Some(Token::Punctuation(punctuation)) => {
|
||||||
|
format!("a punctuation token `{}`", punctuation.punctuation)
|
||||||
|
}
|
||||||
|
Some(Token::Numeric(..)) => "a numeric token".to_string(),
|
||||||
|
Some(Token::LiteralCommand(..)) => "a literal command token".to_string(),
|
||||||
|
|
||||||
|
None => "EOF".to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let message = format!("expected {expected_binding}, but found {found_binding}");
|
||||||
|
|
||||||
|
write!(f, "{}", Message::new(Severity::Error, message))?;
|
||||||
|
|
||||||
|
self.found.as_ref().map_or(Ok(()), |span| {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"\n{}",
|
||||||
|
SourceCodeDisplay::new(span.span(), Option::<i32>::None)
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for UnexpectedSyntax {}
|
||||||
|
|
||||||
|
/// An enumeration containing all kinds of syntactic errors that can occur while parsing the
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, thiserror::Error)]
|
||||||
|
#[allow(missing_docs)]
|
||||||
|
pub enum Error {
|
||||||
|
#[error("{0}")]
|
||||||
|
UnexpectedSyntax(#[from] UnexpectedSyntax),
|
||||||
|
}
|
|
@ -0,0 +1,6 @@
|
||||||
|
//! This module contains the syntax tree and parser for the `ShulkerScript` language.
|
||||||
|
|
||||||
|
pub mod error;
|
||||||
|
pub mod parser;
|
||||||
|
#[allow(clippy::module_name_repetitions)]
|
||||||
|
pub mod syntax_tree;
|
|
@ -0,0 +1,460 @@
|
||||||
|
//! Provides a way to parse a token stream into an abstract syntax tree.
|
||||||
|
|
||||||
|
use derive_more::{Deref, DerefMut};
|
||||||
|
use enum_as_inner::EnumAsInner;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
base::Handler,
|
||||||
|
lexical::{
|
||||||
|
token::{Identifier, Keyword, KeywordKind, Numeric, Punctuation, Token},
|
||||||
|
token_stream::{Delimited, Delimiter, TokenStream, TokenTree},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::error::{Error, SyntaxKind, UnexpectedSyntax};
|
||||||
|
|
||||||
|
/// Represents a parser that reads a token stream and constructs an abstract syntax tree.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deref, DerefMut)]
|
||||||
|
pub struct Parser<'a> {
|
||||||
|
#[deref]
|
||||||
|
#[deref_mut]
|
||||||
|
current_frame: Frame<'a>,
|
||||||
|
stack: Vec<Frame<'a>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Parser<'a> {
|
||||||
|
/// Creates a new parser from the given token stream.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(token_stream: &'a TokenStream) -> Self {
|
||||||
|
Self {
|
||||||
|
current_frame: Frame {
|
||||||
|
token_provider: TokenProvider::TokenStream(token_stream),
|
||||||
|
current_index: 0,
|
||||||
|
},
|
||||||
|
stack: Vec::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Steps into the [`Delimited`] token stream and parses the content within the delimiters.
|
||||||
|
///
|
||||||
|
/// The parser's position must be at the delimited token stream.
|
||||||
|
pub fn step_into<T>(
|
||||||
|
&mut self,
|
||||||
|
delimiter: Delimiter,
|
||||||
|
f: impl FnOnce(&mut Self) -> Option<T>,
|
||||||
|
handler: &dyn Handler<Error>,
|
||||||
|
) -> Option<DelimitedTree<T>> {
|
||||||
|
self.current_frame.stop_at_significant();
|
||||||
|
let raw_token_tree = self
|
||||||
|
.current_frame
|
||||||
|
.token_provider
|
||||||
|
.token_stream()
|
||||||
|
.get(self.current_frame.current_index);
|
||||||
|
|
||||||
|
// move after the whole delimited list
|
||||||
|
self.current_frame.forward();
|
||||||
|
|
||||||
|
let expected = match delimiter {
|
||||||
|
Delimiter::Parenthesis => '(',
|
||||||
|
Delimiter::Brace => '{',
|
||||||
|
Delimiter::Bracket => '[',
|
||||||
|
};
|
||||||
|
|
||||||
|
let delimited_stream = if let Some(token_tree) = raw_token_tree {
|
||||||
|
match token_tree {
|
||||||
|
TokenTree::Delimited(delimited_tree) if delimited_tree.delimiter == delimiter => {
|
||||||
|
delimited_tree
|
||||||
|
}
|
||||||
|
found => {
|
||||||
|
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
|
||||||
|
expected: SyntaxKind::Punctuation(expected),
|
||||||
|
found: Some(match found {
|
||||||
|
TokenTree::Token(token) => token.clone(),
|
||||||
|
TokenTree::Delimited(delimited_tree) => {
|
||||||
|
Token::Punctuation(delimited_tree.open.clone())
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
}));
|
||||||
|
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
|
||||||
|
expected: SyntaxKind::Punctuation(expected),
|
||||||
|
found: self.get_reading(None).into_token(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
return None;
|
||||||
|
};
|
||||||
|
|
||||||
|
// creates a new frame
|
||||||
|
let new_frame = Frame {
|
||||||
|
token_provider: TokenProvider::Delimited(delimited_stream),
|
||||||
|
current_index: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
// pushes the current frame onto the stack and replaces the current frame with the new one
|
||||||
|
self.stack
|
||||||
|
.push(std::mem::replace(&mut self.current_frame, new_frame));
|
||||||
|
|
||||||
|
let open = delimited_stream.open.clone();
|
||||||
|
|
||||||
|
let tree = f(self);
|
||||||
|
|
||||||
|
// pops the current frame off the stack
|
||||||
|
let new_frame = self.stack.pop()?;
|
||||||
|
|
||||||
|
// the current frame must be at the end
|
||||||
|
if !self.current_frame.is_exhausted() {
|
||||||
|
let expected = match self
|
||||||
|
.current_frame
|
||||||
|
.token_provider
|
||||||
|
.as_delimited()
|
||||||
|
.unwrap()
|
||||||
|
.delimiter
|
||||||
|
{
|
||||||
|
Delimiter::Parenthesis => ')',
|
||||||
|
Delimiter::Brace => '}',
|
||||||
|
Delimiter::Bracket => ']',
|
||||||
|
};
|
||||||
|
|
||||||
|
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
|
||||||
|
expected: SyntaxKind::Punctuation(expected),
|
||||||
|
found: self.peek().into_token(),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
let close_punctuation = self
|
||||||
|
.current_frame
|
||||||
|
.token_provider
|
||||||
|
.as_delimited()
|
||||||
|
.unwrap()
|
||||||
|
.close
|
||||||
|
.clone();
|
||||||
|
|
||||||
|
// replaces the current frame with the popped one
|
||||||
|
self.current_frame = new_frame;
|
||||||
|
|
||||||
|
Some(DelimitedTree {
|
||||||
|
open,
|
||||||
|
tree,
|
||||||
|
close: close_punctuation,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents a result of [`Parser::step_into()`] function.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct DelimitedTree<T> {
|
||||||
|
/// The opening delimiter.
|
||||||
|
pub open: Punctuation,
|
||||||
|
|
||||||
|
/// The tree inside the delimiter.
|
||||||
|
pub tree: Option<T>,
|
||||||
|
|
||||||
|
/// The closing delimiter.
|
||||||
|
pub close: Punctuation,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Provides a way to iterate over a token stream.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, EnumAsInner)]
|
||||||
|
pub enum TokenProvider<'a> {
|
||||||
|
/// Iterating at the top level of the token stream.
|
||||||
|
TokenStream(&'a TokenStream),
|
||||||
|
|
||||||
|
/// Iterating inside a delimited token stream.
|
||||||
|
Delimited(&'a Delimited),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> TokenProvider<'a> {
|
||||||
|
/// Gets the token stream of the current token provider.
|
||||||
|
#[must_use]
|
||||||
|
pub fn token_stream(&self) -> &'a TokenStream {
|
||||||
|
match self {
|
||||||
|
TokenProvider::TokenStream(token_stream) => token_stream,
|
||||||
|
TokenProvider::Delimited(delimited) => &delimited.token_stream,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents a single frame of the parser's stack, responsible for reading a token stream in
|
||||||
|
/// that given token stream level.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct Frame<'a> {
|
||||||
|
token_provider: TokenProvider<'a>,
|
||||||
|
current_index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Frame<'a> {
|
||||||
|
/// Checks if the current [`Frame`] doesn't have any more significant [`TokenTree`]s to
|
||||||
|
/// parse.
|
||||||
|
#[must_use]
|
||||||
|
pub fn is_exhausted(&self) -> bool {
|
||||||
|
let token_stream = self.token_provider.token_stream();
|
||||||
|
for i in self.current_index..self.token_provider.token_stream().len() {
|
||||||
|
if !matches!(
|
||||||
|
token_stream.get(i),
|
||||||
|
Some(TokenTree::Token(
|
||||||
|
Token::WhiteSpaces(..) | Token::Comment(..)
|
||||||
|
))
|
||||||
|
) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Checks if the current [`Frame`] has reached the end of the [`TokenStream`].
|
||||||
|
#[must_use]
|
||||||
|
pub fn is_end(&self) -> bool {
|
||||||
|
self.current_index >= self.token_provider.token_stream().len()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_reading(&self, token: Option<&TokenTree>) -> Reading {
|
||||||
|
token.map_or_else(
|
||||||
|
|| match self.token_provider {
|
||||||
|
// end of file
|
||||||
|
TokenProvider::TokenStream(..) => Reading::Eof,
|
||||||
|
TokenProvider::Delimited(delimited) => {
|
||||||
|
Reading::DelimitedEnd(delimited.close.clone())
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|token| match token {
|
||||||
|
TokenTree::Token(token) => Reading::Atomic(token.clone()),
|
||||||
|
TokenTree::Delimited(delimited) => Reading::IntoDelimited(delimited.open.clone()),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a [`Token`] pointing by the `current_index` of the [`Frame`].
|
||||||
|
#[must_use]
|
||||||
|
pub fn peek(&self) -> Reading {
|
||||||
|
self.get_reading(self.token_provider.token_stream().get(self.current_index))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a [`Token`] pointing by the `current_index` with the given index offset of the
|
||||||
|
/// [`Frame`].
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// `None` if `offset + current_index` is less than zero or greter than
|
||||||
|
/// `self.token_provider.token_stream().len() + 1`
|
||||||
|
#[must_use]
|
||||||
|
pub fn peek_offset(&self, offset: isize) -> Option<Reading> {
|
||||||
|
let index = self.current_index.checked_add(offset.try_into().ok()?)?;
|
||||||
|
|
||||||
|
if index > self.token_provider.token_stream().len() + 1 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(self.get_reading(self.token_provider.token_stream().get(index)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a [`Token`] pointing by the `current_index` of the [`Frame`] and increments the
|
||||||
|
/// `current_index` by 1.
|
||||||
|
pub fn next_token(&mut self) -> Reading {
|
||||||
|
let token = self.peek();
|
||||||
|
|
||||||
|
// increment the index
|
||||||
|
self.forward();
|
||||||
|
|
||||||
|
token
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Forwards the `current_index` by 1 if the [`Frame`] is not exhausted.
|
||||||
|
pub fn forward(&mut self) {
|
||||||
|
// increment the index
|
||||||
|
if !self.is_end() {
|
||||||
|
self.current_index += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Skips any insignificant [`Token`]s, returns the next significant [`Token`] found, and
|
||||||
|
/// increments the `current_index` afterward.
|
||||||
|
pub fn next_significant_token(&mut self) -> Reading {
|
||||||
|
let token = self.stop_at_significant();
|
||||||
|
|
||||||
|
// increment the index
|
||||||
|
self.forward();
|
||||||
|
|
||||||
|
token
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Makes the current [`Frame`] point to the significant [`Token`] if currently not.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// The significant [`Token`] if found, otherwise `None`.
|
||||||
|
pub fn stop_at_significant(&mut self) -> Reading {
|
||||||
|
while !self.is_end() {
|
||||||
|
let token = self.peek();
|
||||||
|
|
||||||
|
if !matches!(
|
||||||
|
token,
|
||||||
|
Reading::Atomic(Token::WhiteSpaces(..) | Token::Comment(..))
|
||||||
|
) {
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.forward();
|
||||||
|
}
|
||||||
|
|
||||||
|
match self.token_provider {
|
||||||
|
TokenProvider::TokenStream(..) => Reading::Eof,
|
||||||
|
TokenProvider::Delimited(delimited) => Reading::DelimitedEnd(delimited.close.clone()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Makes the current position stops at the first token that satisfies the predicate.
|
||||||
|
pub fn stop_at(&mut self, predicate: impl Fn(&Reading) -> bool) -> Reading {
|
||||||
|
while !self.is_end() {
|
||||||
|
let token = self.peek();
|
||||||
|
|
||||||
|
if predicate(&token) {
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.current_index += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
match self.token_provider {
|
||||||
|
TokenProvider::TokenStream(..) => Reading::Eof,
|
||||||
|
TokenProvider::Delimited(delimited) => Reading::DelimitedEnd(delimited.close.clone()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Expects the next [`Token`] to be an [`Identifier`], and returns it.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// If the next [`Token`] is not an [`Identifier`].
|
||||||
|
pub fn parse_identifier(&mut self, handler: &impl Handler<Error>) -> Option<Identifier> {
|
||||||
|
match self.next_significant_token() {
|
||||||
|
Reading::Atomic(Token::Identifier(ident)) => Some(ident),
|
||||||
|
found => {
|
||||||
|
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
|
||||||
|
expected: SyntaxKind::Identifier,
|
||||||
|
found: found.into_token(),
|
||||||
|
}));
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Expects the next [`Token`] to be an [`Numeric`], and returns it.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// If the next [`Token`] is not an [`Identifier`].
|
||||||
|
pub fn parse_numeric(&mut self, handler: &dyn Handler<Error>) -> Option<Numeric> {
|
||||||
|
match self.next_significant_token() {
|
||||||
|
Reading::Atomic(Token::Numeric(ident)) => Some(ident),
|
||||||
|
found => {
|
||||||
|
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
|
||||||
|
expected: SyntaxKind::Numeric,
|
||||||
|
found: found.into_token(),
|
||||||
|
}));
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Expects the next [`Token`] to be a [`Keyword`] of specific kind, and returns it.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// If the next [`Token`] is not a [`Keyword`] of specific kind.
|
||||||
|
pub fn parse_keyword(
|
||||||
|
&mut self,
|
||||||
|
expected: KeywordKind,
|
||||||
|
handler: &dyn Handler<Error>,
|
||||||
|
) -> Option<Keyword> {
|
||||||
|
match self.next_significant_token() {
|
||||||
|
Reading::Atomic(Token::Keyword(keyword_token)) if keyword_token.keyword == expected => {
|
||||||
|
Some(keyword_token)
|
||||||
|
}
|
||||||
|
found => {
|
||||||
|
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
|
||||||
|
expected: SyntaxKind::Keyword(expected),
|
||||||
|
found: found.into_token(),
|
||||||
|
}));
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Expects the next [`Token`] to be a [`Punctuation`] of specific kind, and returns it.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// If the next [`Token`] is not a [`Punctuation`] of specific kind.
|
||||||
|
pub fn parse_punctuation(
|
||||||
|
&mut self,
|
||||||
|
expected: char,
|
||||||
|
skip_insignificant: bool,
|
||||||
|
handler: &dyn Handler<Error>,
|
||||||
|
) -> Option<Punctuation> {
|
||||||
|
match if skip_insignificant {
|
||||||
|
self.next_significant_token()
|
||||||
|
} else {
|
||||||
|
self.next_token()
|
||||||
|
} {
|
||||||
|
Reading::Atomic(Token::Punctuation(punctuation_token))
|
||||||
|
if punctuation_token.punctuation == expected =>
|
||||||
|
{
|
||||||
|
Some(punctuation_token)
|
||||||
|
}
|
||||||
|
found => {
|
||||||
|
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
|
||||||
|
expected: SyntaxKind::Punctuation(expected),
|
||||||
|
found: found.into_token(),
|
||||||
|
}));
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tries to parse the given function, and if it fails, resets the current index to the
|
||||||
|
/// `current_index` before the function call.
|
||||||
|
pub fn try_parse<T>(&mut self, f: impl FnOnce(&mut Self) -> Option<T>) -> Option<T> {
|
||||||
|
let current_index = self.current_index;
|
||||||
|
|
||||||
|
let result = f(self);
|
||||||
|
|
||||||
|
if result.is_none() {
|
||||||
|
self.current_index = current_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents the read value of the [`Frame`].
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub enum Reading {
|
||||||
|
/// A singular token.
|
||||||
|
Atomic(Token),
|
||||||
|
|
||||||
|
/// Found an openning delimiter token, which means that the parser can step into a new
|
||||||
|
/// delimited frame.
|
||||||
|
IntoDelimited(Punctuation),
|
||||||
|
|
||||||
|
/// Found a closing delimiter token, which means that the parser should step out of the current
|
||||||
|
/// delimited frame.
|
||||||
|
DelimitedEnd(Punctuation),
|
||||||
|
|
||||||
|
/// End of file.
|
||||||
|
Eof,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Reading {
|
||||||
|
/// Gets the read token inside the [`Reading`] as `Option<Token>`
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// Returns `None` if the [`Reading`] is [`Reading::Eof`].
|
||||||
|
#[must_use]
|
||||||
|
pub fn into_token(self) -> Option<Token> {
|
||||||
|
match self {
|
||||||
|
Self::Atomic(token) => Some(token),
|
||||||
|
Self::IntoDelimited(punc) | Self::DelimitedEnd(punc) => Some(Token::Punctuation(punc)),
|
||||||
|
Self::Eof => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,138 @@
|
||||||
|
//! Syntax tree nodes for declarations.
|
||||||
|
|
||||||
|
#![allow(missing_docs)]
|
||||||
|
|
||||||
|
use getset::Getters;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
base::{
|
||||||
|
source_file::{SourceElement, Span},
|
||||||
|
Handler,
|
||||||
|
},
|
||||||
|
lexical::{
|
||||||
|
token::{Identifier, Keyword, KeywordKind, Punctuation, Token},
|
||||||
|
token_stream::Delimiter,
|
||||||
|
},
|
||||||
|
syntax::{
|
||||||
|
error::{Error, SyntaxKind, UnexpectedSyntax},
|
||||||
|
parser::{Parser, Reading},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{statement::Block, ConnectedList};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub enum Declaration {
|
||||||
|
Function(Function),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SourceElement for Declaration {
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
match self {
|
||||||
|
Self::Function(function) => function.span(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Syntax Synopsis:
|
||||||
|
///
|
||||||
|
/// ``` ebnf
|
||||||
|
/// Function:
|
||||||
|
/// 'function' Identifier '(' ParameterList? ')' Block
|
||||||
|
/// ;
|
||||||
|
///
|
||||||
|
/// ParameterList:
|
||||||
|
/// Identifier (',' Identifier)* ','?
|
||||||
|
/// ;
|
||||||
|
/// ```
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)]
|
||||||
|
pub struct Function {
|
||||||
|
#[get = "pub"]
|
||||||
|
function_keyword: Keyword,
|
||||||
|
#[get = "pub"]
|
||||||
|
identifier: Identifier,
|
||||||
|
#[get = "pub"]
|
||||||
|
open_paren: Punctuation,
|
||||||
|
#[get = "pub"]
|
||||||
|
parameters: Option<ConnectedList<Identifier, Punctuation>>,
|
||||||
|
#[get = "pub"]
|
||||||
|
close_paren: Punctuation,
|
||||||
|
#[get = "pub"]
|
||||||
|
block: Block,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Function {
|
||||||
|
/// Dissolves the [`Function`] into its components.
|
||||||
|
#[must_use]
|
||||||
|
pub fn dissolve(
|
||||||
|
self,
|
||||||
|
) -> (
|
||||||
|
Keyword,
|
||||||
|
Identifier,
|
||||||
|
Punctuation,
|
||||||
|
Option<ConnectedList<Identifier, Punctuation>>,
|
||||||
|
Punctuation,
|
||||||
|
Block,
|
||||||
|
) {
|
||||||
|
(
|
||||||
|
self.function_keyword,
|
||||||
|
self.identifier,
|
||||||
|
self.open_paren,
|
||||||
|
self.parameters,
|
||||||
|
self.close_paren,
|
||||||
|
self.block,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SourceElement for Function {
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
self.function_keyword.span.join(&self.block.span()).unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Parser<'a> {
|
||||||
|
pub fn parse_declaration(&mut self, handler: &impl Handler<Error>) -> Option<Declaration> {
|
||||||
|
match self.stop_at_significant() {
|
||||||
|
Reading::Atomic(Token::Keyword(function_keyword))
|
||||||
|
if function_keyword.keyword == KeywordKind::Function =>
|
||||||
|
{
|
||||||
|
// eat the function keyword
|
||||||
|
self.forward();
|
||||||
|
|
||||||
|
// parse the identifier
|
||||||
|
let identifier = self.parse_identifier(handler)?;
|
||||||
|
let delimited_tree = self.parse_enclosed_list(
|
||||||
|
Delimiter::Parenthesis,
|
||||||
|
',',
|
||||||
|
|parser: &mut Parser<'_>| parser.parse_identifier(handler),
|
||||||
|
handler,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// parse the block
|
||||||
|
let block = self.parse_block(handler)?;
|
||||||
|
|
||||||
|
Some(Declaration::Function(Function {
|
||||||
|
function_keyword,
|
||||||
|
identifier,
|
||||||
|
open_paren: delimited_tree.open,
|
||||||
|
parameters: delimited_tree.list,
|
||||||
|
close_paren: delimited_tree.close,
|
||||||
|
block,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
unexpected => {
|
||||||
|
// make progress
|
||||||
|
self.forward();
|
||||||
|
|
||||||
|
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
|
||||||
|
expected: SyntaxKind::Declaration,
|
||||||
|
found: unexpected.into_token(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1 @@
|
||||||
|
//! Syntax tree nodes for expressions.
|
|
@ -0,0 +1,191 @@
|
||||||
|
//! Contains the syntax tree nodes that represent the structure of the source code.
|
||||||
|
|
||||||
|
use getset::Getters;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
base::{
|
||||||
|
source_file::{SourceElement, Span},
|
||||||
|
Handler,
|
||||||
|
},
|
||||||
|
lexical::{
|
||||||
|
token::{Punctuation, Token},
|
||||||
|
token_stream::Delimiter,
|
||||||
|
},
|
||||||
|
syntax::parser::Reading,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{error::Error, parser::Parser};
|
||||||
|
|
||||||
|
pub mod declaration;
|
||||||
|
pub mod expression;
|
||||||
|
pub mod program;
|
||||||
|
pub mod statement;
|
||||||
|
|
||||||
|
/// Represents a syntax tree node with a pattern of syntax tree nodes separated by a separator.
|
||||||
|
///
|
||||||
|
/// This struct is useful for representing syntax tree nodes that are separated by a separator.
|
||||||
|
/// For example, a comma separated list of expressions such as `1, 2, 3` can be represented by a
|
||||||
|
/// [`ConnectedList`] with the separator being a comma token and the elements being the expressions.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)]
|
||||||
|
pub struct ConnectedList<Element, Separator> {
|
||||||
|
/// The first element of the list.
|
||||||
|
#[get = "pub"]
|
||||||
|
first: Element,
|
||||||
|
|
||||||
|
/// The rest of the elements of the list.
|
||||||
|
///
|
||||||
|
/// Each element of the list is a tuple containing the separator and the element. The separator
|
||||||
|
/// is the token/syntax tree node that separates the current element from the prior one.
|
||||||
|
#[get = "pub"]
|
||||||
|
rest: Vec<(Separator, Element)>,
|
||||||
|
|
||||||
|
/// The trailing separator of the list.
|
||||||
|
#[get = "pub"]
|
||||||
|
trailing_separator: Option<Separator>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents a syntax tree node with a pattern of having [`ConnectedList`] delimited by a pair of
|
||||||
|
/// punctuation like such `(a, b, c)`.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct DelimitedList<T> {
|
||||||
|
/// The open punctuation of the list.
|
||||||
|
pub open: Punctuation,
|
||||||
|
|
||||||
|
/// The list of elements of the list.
|
||||||
|
///
|
||||||
|
/// If `None` then the list is empty (or immediately closed after the open punctuation).
|
||||||
|
pub list: Option<ConnectedList<T, Punctuation>>,
|
||||||
|
|
||||||
|
/// The close punctuation of the list.
|
||||||
|
pub close: Punctuation,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Parser<'a> {
|
||||||
|
/// Parses a list of elements enclosed by a pair of delimiters, separated by a separator.
|
||||||
|
///
|
||||||
|
/// The parser position must be at the delimited list of the given delimiter. It will
|
||||||
|
/// consume the whole delimited list and move the next token after the list.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// - if the parser position is not at the delimited list of the given delimiter.
|
||||||
|
/// - any error returned by the given parser function.
|
||||||
|
pub fn parse_enclosed_list<T>(
|
||||||
|
&mut self,
|
||||||
|
delimiter: Delimiter,
|
||||||
|
separator: char,
|
||||||
|
mut f: impl FnMut(&mut Self) -> Option<T>,
|
||||||
|
handler: &impl Handler<Error>,
|
||||||
|
) -> Option<DelimitedList<T>> {
|
||||||
|
fn skip_to_next_separator(this: &mut Parser, separator: char) -> Option<Punctuation> {
|
||||||
|
if let Reading::Atomic(Token::Punctuation(punc)) = this.stop_at(|token| {
|
||||||
|
matches!(
|
||||||
|
token, Reading::Atomic(Token::Punctuation(punc))
|
||||||
|
if punc.punctuation == separator
|
||||||
|
)
|
||||||
|
}) {
|
||||||
|
this.forward();
|
||||||
|
Some(punc)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let delimited_tree = self.step_into(
|
||||||
|
delimiter,
|
||||||
|
|parser| {
|
||||||
|
let mut first = None;
|
||||||
|
let mut rest = Vec::new();
|
||||||
|
let mut trailing_separator: Option<Punctuation> = None;
|
||||||
|
|
||||||
|
while !parser.is_exhausted() {
|
||||||
|
let Some(element) = f(parser) else {
|
||||||
|
skip_to_next_separator(parser, separator);
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
// adds new element
|
||||||
|
match (&first, &trailing_separator) {
|
||||||
|
(None, None) => {
|
||||||
|
first = Some(element);
|
||||||
|
}
|
||||||
|
(Some(_), Some(separator)) => {
|
||||||
|
rest.push((separator.clone(), element));
|
||||||
|
trailing_separator = None;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// expect separator if not exhausted
|
||||||
|
if !parser.is_exhausted() {
|
||||||
|
let Some(separator) = parser.parse_punctuation(separator, true, handler)
|
||||||
|
else {
|
||||||
|
if let Some(punctuation) = skip_to_next_separator(parser, separator) {
|
||||||
|
trailing_separator = Some(punctuation);
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
trailing_separator = Some(separator);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(first.map(|first| ConnectedList {
|
||||||
|
first,
|
||||||
|
rest,
|
||||||
|
trailing_separator,
|
||||||
|
}))
|
||||||
|
},
|
||||||
|
handler,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Some(DelimitedList {
|
||||||
|
open: delimited_tree.open,
|
||||||
|
list: delimited_tree.tree.unwrap(),
|
||||||
|
close: delimited_tree.close,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Element: SourceElement, Separator: SourceElement> SourceElement
|
||||||
|
for ConnectedList<Element, Separator>
|
||||||
|
{
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
let end = self.trailing_separator.as_ref().map_or_else(
|
||||||
|
|| {
|
||||||
|
self.rest
|
||||||
|
.last()
|
||||||
|
.map_or_else(|| self.first.span(), |(_, element)| element.span())
|
||||||
|
},
|
||||||
|
SourceElement::span,
|
||||||
|
);
|
||||||
|
|
||||||
|
self.first.span().join(&end).unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Element, Separator> ConnectedList<Element, Separator> {
|
||||||
|
/// Returns an iterator over the elements of the list.
|
||||||
|
pub fn elements(&self) -> impl Iterator<Item = &Element> {
|
||||||
|
std::iter::once(&self.first).chain(self.rest.iter().map(|(_, element)| element))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns an iterator over the elements of the list.
|
||||||
|
pub fn into_elements(self) -> impl Iterator<Item = Element> {
|
||||||
|
std::iter::once(self.first).chain(self.rest.into_iter().map(|(_, element)| element))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets the number of elements in the list.
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.rest.len() + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if the list is empty.
|
||||||
|
///
|
||||||
|
/// The function will never return `false`.
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
//! The program node of the syntax tree.
|
||||||
|
|
||||||
|
use getset::Getters;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
base::Handler,
|
||||||
|
syntax::{
|
||||||
|
error::Error,
|
||||||
|
parser::{Parser, Reading},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::declaration::Declaration;
|
||||||
|
|
||||||
|
/// Program is a collection of declarations.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)]
|
||||||
|
pub struct Program {
|
||||||
|
/// The declarations within the program.
|
||||||
|
#[get = "pub"]
|
||||||
|
declarations: Vec<Declaration>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Parser<'a> {
|
||||||
|
/// Parses a [`Program`].
|
||||||
|
pub fn parse_program(&mut self, handler: &impl Handler<Error>) -> Option<Program> {
|
||||||
|
let mut declarations = Vec::new();
|
||||||
|
|
||||||
|
while !self.is_exhausted() {
|
||||||
|
let result = self.parse_declaration(handler);
|
||||||
|
|
||||||
|
#[allow(clippy::option_if_let_else)]
|
||||||
|
if let Some(x) = result {
|
||||||
|
declarations.push(x);
|
||||||
|
} else {
|
||||||
|
self.stop_at(|reading| {
|
||||||
|
matches!(
|
||||||
|
reading,
|
||||||
|
Reading::IntoDelimited(x) if x.punctuation == '{'
|
||||||
|
)
|
||||||
|
});
|
||||||
|
|
||||||
|
self.next_token();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Program { declarations })
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,148 @@
|
||||||
|
//! Syntax tree nodes for statements.
|
||||||
|
|
||||||
|
use getset::Getters;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
base::{
|
||||||
|
source_file::{SourceElement, Span},
|
||||||
|
Handler,
|
||||||
|
},
|
||||||
|
lexical::{
|
||||||
|
token::{LiteralCommand, Punctuation, Token},
|
||||||
|
token_stream::Delimiter,
|
||||||
|
},
|
||||||
|
syntax::{
|
||||||
|
error::{Error, SyntaxKind, UnexpectedSyntax},
|
||||||
|
parser::{Parser, Reading},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Syntax Synopsis:
|
||||||
|
///
|
||||||
|
/// ``` ebnf
|
||||||
|
/// Statement:
|
||||||
|
/// Block
|
||||||
|
/// | Conditional
|
||||||
|
/// ;
|
||||||
|
/// ```
|
||||||
|
#[allow(missing_docs)]
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub enum Statement {
|
||||||
|
Block(Block),
|
||||||
|
LiteralCommand(LiteralCommand),
|
||||||
|
// Conditional(Conditional),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SourceElement for Statement {
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
match self {
|
||||||
|
Self::Block(block) => block.span(),
|
||||||
|
Self::LiteralCommand(literal_command) => literal_command.span(),
|
||||||
|
//Self::Conditional(conditional) => conditional.span(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Syntax Synopsis:
|
||||||
|
///
|
||||||
|
/// ``` ebnf
|
||||||
|
/// Block:
|
||||||
|
/// '{' Statement* '}'
|
||||||
|
/// ;
|
||||||
|
/// ```
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)]
|
||||||
|
pub struct Block {
|
||||||
|
/// The opening brace of the block.
|
||||||
|
#[get = "pub"]
|
||||||
|
pub open_brace: Punctuation,
|
||||||
|
/// The statements within the block.
|
||||||
|
#[get = "pub"]
|
||||||
|
pub statements: Vec<Statement>,
|
||||||
|
/// The closing brace of the block.
|
||||||
|
#[get = "pub"]
|
||||||
|
pub close_brace: Punctuation,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Block {
|
||||||
|
/// Dissolves the [`Block`] into its components.
|
||||||
|
#[must_use]
|
||||||
|
pub fn dissolve(self) -> (Punctuation, Vec<Statement>, Punctuation) {
|
||||||
|
(self.open_brace, self.statements, self.close_brace)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SourceElement for Block {
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
self.open_brace
|
||||||
|
.span()
|
||||||
|
.join(&self.close_brace.span())
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Parser<'a> {
|
||||||
|
/// Parses a [`Block`].
|
||||||
|
pub fn parse_block(&mut self, handler: &impl Handler<Error>) -> Option<Block> {
|
||||||
|
let token_tree = self.step_into(
|
||||||
|
Delimiter::Brace,
|
||||||
|
|parser| {
|
||||||
|
let mut statements = Vec::new();
|
||||||
|
|
||||||
|
while !parser.is_exhausted() {
|
||||||
|
parser.parse_statement(handler).map_or_else(
|
||||||
|
|| {
|
||||||
|
// error recovery
|
||||||
|
parser.stop_at(|reading| matches!(
|
||||||
|
reading,
|
||||||
|
Reading::Atomic(Token::Punctuation(punc)) if punc.punctuation == ';'
|
||||||
|
) || matches!(
|
||||||
|
reading,
|
||||||
|
Reading::IntoDelimited(punc) if punc.punctuation == '{'
|
||||||
|
));
|
||||||
|
|
||||||
|
// goes after the semicolon or the open brace
|
||||||
|
parser.forward();
|
||||||
|
},
|
||||||
|
|statement| statements.push(statement),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(statements)
|
||||||
|
},
|
||||||
|
handler,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Some(Block {
|
||||||
|
open_brace: token_tree.open,
|
||||||
|
statements: token_tree.tree?,
|
||||||
|
close_brace: token_tree.close,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a [`Statement`].
|
||||||
|
pub fn parse_statement(&mut self, handler: &impl Handler<Error>) -> Option<Statement> {
|
||||||
|
match self.stop_at_significant() {
|
||||||
|
// variable declaration
|
||||||
|
Reading::Atomic(Token::LiteralCommand(command)) => {
|
||||||
|
self.forward();
|
||||||
|
Some(Statement::LiteralCommand(command))
|
||||||
|
}
|
||||||
|
// block statement
|
||||||
|
Reading::IntoDelimited(open_brace) if open_brace.punctuation == '{' => {
|
||||||
|
let block = self.parse_block(handler)?;
|
||||||
|
|
||||||
|
Some(Statement::Block(block))
|
||||||
|
}
|
||||||
|
|
||||||
|
// other
|
||||||
|
unexpected => {
|
||||||
|
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
|
||||||
|
expected: SyntaxKind::Statement,
|
||||||
|
found: unexpected.into_token(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue