Add parser for syntax tree

This commit is contained in:
Moritz Hölting 2024-03-27 21:39:56 +01:00
parent 830b3b10d9
commit 9d24571b40
14 changed files with 1145 additions and 9 deletions

View File

@ -8,6 +8,7 @@ edition = "2021"
[dependencies] [dependencies]
colored = "2.1.0" colored = "2.1.0"
derive_more = { version = "0.99.17", default-features = false, features = ["deref", "from", "deref_mut"] } derive_more = { version = "0.99.17", default-features = false, features = ["deref", "from", "deref_mut"] }
enum-as-inner = "0.6.0"
getset = "0.1.2" getset = "0.1.2"
strum = { version = "0.26.2", features = ["derive"] } strum = { version = "0.26.2", features = ["derive"] }
strum_macros = "0.26.2" strum_macros = "0.26.2"

View File

@ -8,6 +8,8 @@ pub enum Error {
IoError(#[from] io::Error), IoError(#[from] io::Error),
#[error("An error occured while tokenizing the source code.")] #[error("An error occured while tokenizing the source code.")]
TokenizeError(#[from] crate::lexical::token::TokenizeError), TokenizeError(#[from] crate::lexical::token::TokenizeError),
#[error("An error occurred while parsing the source code.")]
ParseError(#[from] crate::syntax::error::Error),
#[error("An error occurred")] #[error("An error occurred")]
Other(&'static str), Other(&'static str),
} }

View File

@ -71,6 +71,7 @@ impl SourceFile {
SourceIterator { SourceIterator {
source_file: self, source_file: self,
iterator: self.content().char_indices().peekable(), iterator: self.content().char_indices().peekable(),
prev: None,
} }
} }
@ -299,6 +300,9 @@ pub struct SourceIterator<'a> {
#[get_copy = "pub"] #[get_copy = "pub"]
source_file: &'a Arc<SourceFile>, source_file: &'a Arc<SourceFile>,
iterator: Peekable<CharIndices<'a>>, iterator: Peekable<CharIndices<'a>>,
/// Get the previous character that was iterated over.
#[get_copy = "pub"]
prev: Option<(usize, char)>,
} }
impl<'a> SourceIterator<'a> { impl<'a> SourceIterator<'a> {
/// Peek at the next character in the source file. /// Peek at the next character in the source file.
@ -310,7 +314,11 @@ impl<'a> Iterator for SourceIterator<'a> {
type Item = (usize, char); type Item = (usize, char);
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.iterator.next() let item = self.iterator.next();
if item.is_some() {
self.prev = item;
}
item
} }
} }

View File

@ -74,6 +74,7 @@ pub enum Token {
Punctuation(Punctuation), Punctuation(Punctuation),
Numeric(Numeric), Numeric(Numeric),
Comment(Comment), Comment(Comment),
LiteralCommand(LiteralCommand),
} }
impl Token { impl Token {
@ -87,6 +88,7 @@ impl Token {
Self::Punctuation(token) => &token.span, Self::Punctuation(token) => &token.span,
Self::Numeric(token) => &token.span, Self::Numeric(token) => &token.span,
Self::Comment(token) => &token.span, Self::Comment(token) => &token.span,
Self::LiteralCommand(token) => &token.span,
} }
} }
} }
@ -100,6 +102,7 @@ impl SourceElement for Token {
Self::Punctuation(token) => token.span(), Self::Punctuation(token) => token.span(),
Self::Numeric(token) => token.span(), Self::Numeric(token) => token.span(),
Self::Comment(token) => token.span(), Self::Comment(token) => token.span(),
Self::LiteralCommand(token) => token.span(),
} }
} }
} }
@ -200,6 +203,26 @@ impl SourceElement for Comment {
} }
} }
/// Represents a hardcoded literal command in the source code.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct LiteralCommand {
/// Span that makes up the token.
pub span: Span,
}
impl SourceElement for LiteralCommand {
fn span(&self) -> Span {
self.span.clone()
}
}
impl LiteralCommand {
/// Returns the command without the leading slash.
#[must_use]
pub fn clean_command(&self) -> &str {
&self.span.str().trim()[1..]
}
}
/// Is an error that can occur when invoking the [`Token::tokenize`] method. /// Is an error that can occur when invoking the [`Token::tokenize`] method.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, thiserror::Error, From)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, thiserror::Error, From)]
#[allow(missing_docs)] #[allow(missing_docs)]
@ -283,6 +306,7 @@ impl Token {
iter: &mut SourceIterator, iter: &mut SourceIterator,
start: usize, start: usize,
character: char, character: char,
prev_token: Option<&Self>,
handler: &impl Handler<Error>, handler: &impl Handler<Error>,
) -> Result<Self, TokenizeError> { ) -> Result<Self, TokenizeError> {
// Single line comment // Single line comment
@ -341,6 +365,10 @@ impl Token {
return Err(TokenizeError::FatalLexicalError); return Err(TokenizeError::FatalLexicalError);
} }
} }
// When there is no second slash and at the start of a line
else if prev_token.map_or(true, |token| token.span().str().contains('\n')) {
Ok(Self::handle_literal_command(iter, start))
}
// Just a single slash punctuation // Just a single slash punctuation
else { else {
Ok(Punctuation { Ok(Punctuation {
@ -362,6 +390,16 @@ impl Token {
.into() .into()
} }
/// Handles a command that is preceeded by a slash
fn handle_literal_command(iter: &mut SourceIterator, start: usize) -> Self {
Self::walk_iter(iter, |c| !(c.is_whitespace() && c.is_ascii_control()));
LiteralCommand {
span: Self::create_span(start, iter),
}
.into()
}
/// Lexes the source code from the given iterator. /// Lexes the source code from the given iterator.
/// ///
/// The tokenization starts at the current location of the iterator. The function moves the /// The tokenization starts at the current location of the iterator. The function moves the
@ -375,6 +413,7 @@ impl Token {
pub fn tokenize( pub fn tokenize(
iter: &mut SourceIterator, iter: &mut SourceIterator,
handler: &impl Handler<Error>, handler: &impl Handler<Error>,
prev_token: Option<&Self>,
) -> Result<Self, TokenizeError> { ) -> Result<Self, TokenizeError> {
// Gets the first character // Gets the first character
let (start, character) = iter let (start, character) = iter
@ -391,7 +430,7 @@ impl Token {
} }
// Found comment/single slash punctuation // Found comment/single slash punctuation
else if character == '/' { else if character == '/' {
Self::handle_comment(iter, start, character, handler) Self::handle_comment(iter, start, character, prev_token, handler)
} }
// Found numeric literal // Found numeric literal
else if character.is_ascii_digit() { else if character.is_ascii_digit() {

View File

@ -47,7 +47,7 @@ impl TokenStream {
// Tokenize the source code. // Tokenize the source code.
loop { loop {
match Token::tokenize(&mut source_file_iterator, handler) { match Token::tokenize(&mut source_file_iterator, handler, tokens.last()) {
Ok(token) => tokens.push(token), Ok(token) => tokens.push(token),
Err(TokenizeError::EndOfSourceCodeIteratorArgument) => { Err(TokenizeError::EndOfSourceCodeIteratorArgument) => {
break; break;

View File

@ -3,25 +3,24 @@
//! `ShulkerScript` is a simple, imperative scripting language for creating Minecraft data packs. //! `ShulkerScript` is a simple, imperative scripting language for creating Minecraft data packs.
#![deny( #![deny(
missing_docs,
missing_debug_implementations, missing_debug_implementations,
missing_copy_implementations, missing_copy_implementations,
clippy::all,
clippy::pedantic,
clippy::nursery, clippy::nursery,
rustdoc::broken_intra_doc_links, rustdoc::broken_intra_doc_links,
clippy::missing_errors_doc clippy::missing_errors_doc
)] )]
#![warn(missing_docs, clippy::all, clippy::pedantic)]
#![allow(clippy::missing_panics_doc, clippy::missing_const_for_fn)] #![allow(clippy::missing_panics_doc, clippy::missing_const_for_fn)]
pub mod base; pub mod base;
pub mod lexical; pub mod lexical;
pub mod syntax;
use std::{cell::Cell, fmt::Display, path::PathBuf}; use std::{cell::Cell, fmt::Display, path::PathBuf};
use base::{source_file::SourceFile, Handler, Result}; use base::{source_file::SourceFile, Handler, Result};
use crate::{base::Error, lexical::token_stream::TokenStream}; use crate::{base::Error, lexical::token_stream::TokenStream, syntax::parser::Parser};
/// Compiles the given source code. /// Compiles the given source code.
/// ///
@ -34,14 +33,25 @@ pub fn compile(path: PathBuf) -> Result<()> {
let tokens = TokenStream::tokenize(&source_file, &printer); let tokens = TokenStream::tokenize(&source_file, &printer);
println!("{tokens:#?}");
if printer.has_printed() { if printer.has_printed() {
return Err(Error::Other( return Err(Error::Other(
"An error occurred while tokenizing the source code.", "An error occurred while tokenizing the source code.",
)); ));
} }
let mut parser = Parser::new(&tokens);
let result = parser.parse_program(&printer).ok_or(Error::Other(
"An error occured while parsing the source code.",
))?;
println!("result: {result:#?}");
if printer.has_printed() {
return Err(Error::Other(
"An error occurred while parsing the source code.",
));
}
Ok(()) Ok(())
} }

84
src/syntax/error.rs Normal file
View File

@ -0,0 +1,84 @@
//! Contains the error types that can occur while parsing the syntax of the language.
use std::fmt::Display;
use crate::{
base::log::{Message, Severity, SourceCodeDisplay},
lexical::token::{KeywordKind, Token},
};
/// Enumeration containing all kinds of syntax that can be failed to parse.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[allow(missing_docs)]
pub enum SyntaxKind {
Punctuation(char),
Keyword(KeywordKind),
Identifier,
Declaration,
Numeric,
Statement,
Expression,
Type,
}
/// A syntax/token is expected but found an other invalid token.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct UnexpectedSyntax {
/// The kind of syntax that was expected.
pub expected: SyntaxKind,
/// The invalid token that was found.
pub found: Option<Token>,
}
impl Display for UnexpectedSyntax {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let expected_binding = match self.expected {
SyntaxKind::Identifier => "an identifier token".to_string(),
SyntaxKind::Punctuation(char) => format!("a punctuation token `{char}`"),
SyntaxKind::Keyword(keyword) => format!("a keyword token `{}`", keyword.as_str()),
SyntaxKind::Declaration => "a declaration token".to_string(),
SyntaxKind::Numeric => "a numeric token".to_string(),
SyntaxKind::Statement => "a statement syntax".to_string(),
SyntaxKind::Expression => "an expression syntax".to_string(),
SyntaxKind::Type => "a type syntax".to_string(),
};
let found_binding = match self.found.clone() {
Some(Token::Comment(..)) => "a comment token".to_string(),
Some(Token::Identifier(..)) => "an identifier token".to_string(),
Some(Token::Keyword(keyword)) => {
format!("a keyword token `{}`", keyword.keyword.as_str())
}
Some(Token::WhiteSpaces(..)) => "a white spaces token".to_string(),
Some(Token::Punctuation(punctuation)) => {
format!("a punctuation token `{}`", punctuation.punctuation)
}
Some(Token::Numeric(..)) => "a numeric token".to_string(),
Some(Token::LiteralCommand(..)) => "a literal command token".to_string(),
None => "EOF".to_string(),
};
let message = format!("expected {expected_binding}, but found {found_binding}");
write!(f, "{}", Message::new(Severity::Error, message))?;
self.found.as_ref().map_or(Ok(()), |span| {
write!(
f,
"\n{}",
SourceCodeDisplay::new(span.span(), Option::<i32>::None)
)
})
}
}
impl std::error::Error for UnexpectedSyntax {}
/// An enumeration containing all kinds of syntactic errors that can occur while parsing the
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("{0}")]
UnexpectedSyntax(#[from] UnexpectedSyntax),
}

6
src/syntax/mod.rs Normal file
View File

@ -0,0 +1,6 @@
//! This module contains the syntax tree and parser for the `ShulkerScript` language.
pub mod error;
pub mod parser;
#[allow(clippy::module_name_repetitions)]
pub mod syntax_tree;

460
src/syntax/parser.rs Normal file
View File

@ -0,0 +1,460 @@
//! Provides a way to parse a token stream into an abstract syntax tree.
use derive_more::{Deref, DerefMut};
use enum_as_inner::EnumAsInner;
use crate::{
base::Handler,
lexical::{
token::{Identifier, Keyword, KeywordKind, Numeric, Punctuation, Token},
token_stream::{Delimited, Delimiter, TokenStream, TokenTree},
},
};
use super::error::{Error, SyntaxKind, UnexpectedSyntax};
/// Represents a parser that reads a token stream and constructs an abstract syntax tree.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deref, DerefMut)]
pub struct Parser<'a> {
#[deref]
#[deref_mut]
current_frame: Frame<'a>,
stack: Vec<Frame<'a>>,
}
impl<'a> Parser<'a> {
/// Creates a new parser from the given token stream.
#[must_use]
pub fn new(token_stream: &'a TokenStream) -> Self {
Self {
current_frame: Frame {
token_provider: TokenProvider::TokenStream(token_stream),
current_index: 0,
},
stack: Vec::new(),
}
}
/// Steps into the [`Delimited`] token stream and parses the content within the delimiters.
///
/// The parser's position must be at the delimited token stream.
pub fn step_into<T>(
&mut self,
delimiter: Delimiter,
f: impl FnOnce(&mut Self) -> Option<T>,
handler: &dyn Handler<Error>,
) -> Option<DelimitedTree<T>> {
self.current_frame.stop_at_significant();
let raw_token_tree = self
.current_frame
.token_provider
.token_stream()
.get(self.current_frame.current_index);
// move after the whole delimited list
self.current_frame.forward();
let expected = match delimiter {
Delimiter::Parenthesis => '(',
Delimiter::Brace => '{',
Delimiter::Bracket => '[',
};
let delimited_stream = if let Some(token_tree) = raw_token_tree {
match token_tree {
TokenTree::Delimited(delimited_tree) if delimited_tree.delimiter == delimiter => {
delimited_tree
}
found => {
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
expected: SyntaxKind::Punctuation(expected),
found: Some(match found {
TokenTree::Token(token) => token.clone(),
TokenTree::Delimited(delimited_tree) => {
Token::Punctuation(delimited_tree.open.clone())
}
}),
}));
return None;
}
}
} else {
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
expected: SyntaxKind::Punctuation(expected),
found: self.get_reading(None).into_token(),
}));
return None;
};
// creates a new frame
let new_frame = Frame {
token_provider: TokenProvider::Delimited(delimited_stream),
current_index: 0,
};
// pushes the current frame onto the stack and replaces the current frame with the new one
self.stack
.push(std::mem::replace(&mut self.current_frame, new_frame));
let open = delimited_stream.open.clone();
let tree = f(self);
// pops the current frame off the stack
let new_frame = self.stack.pop()?;
// the current frame must be at the end
if !self.current_frame.is_exhausted() {
let expected = match self
.current_frame
.token_provider
.as_delimited()
.unwrap()
.delimiter
{
Delimiter::Parenthesis => ')',
Delimiter::Brace => '}',
Delimiter::Bracket => ']',
};
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
expected: SyntaxKind::Punctuation(expected),
found: self.peek().into_token(),
}));
}
let close_punctuation = self
.current_frame
.token_provider
.as_delimited()
.unwrap()
.close
.clone();
// replaces the current frame with the popped one
self.current_frame = new_frame;
Some(DelimitedTree {
open,
tree,
close: close_punctuation,
})
}
}
/// Represents a result of [`Parser::step_into()`] function.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct DelimitedTree<T> {
/// The opening delimiter.
pub open: Punctuation,
/// The tree inside the delimiter.
pub tree: Option<T>,
/// The closing delimiter.
pub close: Punctuation,
}
/// Provides a way to iterate over a token stream.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, EnumAsInner)]
pub enum TokenProvider<'a> {
/// Iterating at the top level of the token stream.
TokenStream(&'a TokenStream),
/// Iterating inside a delimited token stream.
Delimited(&'a Delimited),
}
impl<'a> TokenProvider<'a> {
/// Gets the token stream of the current token provider.
#[must_use]
pub fn token_stream(&self) -> &'a TokenStream {
match self {
TokenProvider::TokenStream(token_stream) => token_stream,
TokenProvider::Delimited(delimited) => &delimited.token_stream,
}
}
}
/// Represents a single frame of the parser's stack, responsible for reading a token stream in
/// that given token stream level.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Frame<'a> {
token_provider: TokenProvider<'a>,
current_index: usize,
}
impl<'a> Frame<'a> {
/// Checks if the current [`Frame`] doesn't have any more significant [`TokenTree`]s to
/// parse.
#[must_use]
pub fn is_exhausted(&self) -> bool {
let token_stream = self.token_provider.token_stream();
for i in self.current_index..self.token_provider.token_stream().len() {
if !matches!(
token_stream.get(i),
Some(TokenTree::Token(
Token::WhiteSpaces(..) | Token::Comment(..)
))
) {
return false;
}
}
true
}
/// Checks if the current [`Frame`] has reached the end of the [`TokenStream`].
#[must_use]
pub fn is_end(&self) -> bool {
self.current_index >= self.token_provider.token_stream().len()
}
fn get_reading(&self, token: Option<&TokenTree>) -> Reading {
token.map_or_else(
|| match self.token_provider {
// end of file
TokenProvider::TokenStream(..) => Reading::Eof,
TokenProvider::Delimited(delimited) => {
Reading::DelimitedEnd(delimited.close.clone())
}
},
|token| match token {
TokenTree::Token(token) => Reading::Atomic(token.clone()),
TokenTree::Delimited(delimited) => Reading::IntoDelimited(delimited.open.clone()),
},
)
}
/// Returns a [`Token`] pointing by the `current_index` of the [`Frame`].
#[must_use]
pub fn peek(&self) -> Reading {
self.get_reading(self.token_provider.token_stream().get(self.current_index))
}
/// Returns a [`Token`] pointing by the `current_index` with the given index offset of the
/// [`Frame`].
///
/// # Returns
///
/// `None` if `offset + current_index` is less than zero or greter than
/// `self.token_provider.token_stream().len() + 1`
#[must_use]
pub fn peek_offset(&self, offset: isize) -> Option<Reading> {
let index = self.current_index.checked_add(offset.try_into().ok()?)?;
if index > self.token_provider.token_stream().len() + 1 {
return None;
}
Some(self.get_reading(self.token_provider.token_stream().get(index)))
}
/// Returns a [`Token`] pointing by the `current_index` of the [`Frame`] and increments the
/// `current_index` by 1.
pub fn next_token(&mut self) -> Reading {
let token = self.peek();
// increment the index
self.forward();
token
}
/// Forwards the `current_index` by 1 if the [`Frame`] is not exhausted.
pub fn forward(&mut self) {
// increment the index
if !self.is_end() {
self.current_index += 1;
}
}
/// Skips any insignificant [`Token`]s, returns the next significant [`Token`] found, and
/// increments the `current_index` afterward.
pub fn next_significant_token(&mut self) -> Reading {
let token = self.stop_at_significant();
// increment the index
self.forward();
token
}
/// Makes the current [`Frame`] point to the significant [`Token`] if currently not.
///
/// # Returns
/// The significant [`Token`] if found, otherwise `None`.
pub fn stop_at_significant(&mut self) -> Reading {
while !self.is_end() {
let token = self.peek();
if !matches!(
token,
Reading::Atomic(Token::WhiteSpaces(..) | Token::Comment(..))
) {
return token;
}
self.forward();
}
match self.token_provider {
TokenProvider::TokenStream(..) => Reading::Eof,
TokenProvider::Delimited(delimited) => Reading::DelimitedEnd(delimited.close.clone()),
}
}
/// Makes the current position stops at the first token that satisfies the predicate.
pub fn stop_at(&mut self, predicate: impl Fn(&Reading) -> bool) -> Reading {
while !self.is_end() {
let token = self.peek();
if predicate(&token) {
return token;
}
self.current_index += 1;
}
match self.token_provider {
TokenProvider::TokenStream(..) => Reading::Eof,
TokenProvider::Delimited(delimited) => Reading::DelimitedEnd(delimited.close.clone()),
}
}
/// Expects the next [`Token`] to be an [`Identifier`], and returns it.
///
/// # Errors
/// If the next [`Token`] is not an [`Identifier`].
pub fn parse_identifier(&mut self, handler: &impl Handler<Error>) -> Option<Identifier> {
match self.next_significant_token() {
Reading::Atomic(Token::Identifier(ident)) => Some(ident),
found => {
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
expected: SyntaxKind::Identifier,
found: found.into_token(),
}));
None
}
}
}
/// Expects the next [`Token`] to be an [`Numeric`], and returns it.
///
/// # Errors
/// If the next [`Token`] is not an [`Identifier`].
pub fn parse_numeric(&mut self, handler: &dyn Handler<Error>) -> Option<Numeric> {
match self.next_significant_token() {
Reading::Atomic(Token::Numeric(ident)) => Some(ident),
found => {
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
expected: SyntaxKind::Numeric,
found: found.into_token(),
}));
None
}
}
}
/// Expects the next [`Token`] to be a [`Keyword`] of specific kind, and returns it.
///
/// # Errors
/// If the next [`Token`] is not a [`Keyword`] of specific kind.
pub fn parse_keyword(
&mut self,
expected: KeywordKind,
handler: &dyn Handler<Error>,
) -> Option<Keyword> {
match self.next_significant_token() {
Reading::Atomic(Token::Keyword(keyword_token)) if keyword_token.keyword == expected => {
Some(keyword_token)
}
found => {
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
expected: SyntaxKind::Keyword(expected),
found: found.into_token(),
}));
None
}
}
}
/// Expects the next [`Token`] to be a [`Punctuation`] of specific kind, and returns it.
///
/// # Errors
/// If the next [`Token`] is not a [`Punctuation`] of specific kind.
pub fn parse_punctuation(
&mut self,
expected: char,
skip_insignificant: bool,
handler: &dyn Handler<Error>,
) -> Option<Punctuation> {
match if skip_insignificant {
self.next_significant_token()
} else {
self.next_token()
} {
Reading::Atomic(Token::Punctuation(punctuation_token))
if punctuation_token.punctuation == expected =>
{
Some(punctuation_token)
}
found => {
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
expected: SyntaxKind::Punctuation(expected),
found: found.into_token(),
}));
None
}
}
}
/// Tries to parse the given function, and if it fails, resets the current index to the
/// `current_index` before the function call.
pub fn try_parse<T>(&mut self, f: impl FnOnce(&mut Self) -> Option<T>) -> Option<T> {
let current_index = self.current_index;
let result = f(self);
if result.is_none() {
self.current_index = current_index;
}
result
}
}
/// Represents the read value of the [`Frame`].
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Reading {
/// A singular token.
Atomic(Token),
/// Found an openning delimiter token, which means that the parser can step into a new
/// delimited frame.
IntoDelimited(Punctuation),
/// Found a closing delimiter token, which means that the parser should step out of the current
/// delimited frame.
DelimitedEnd(Punctuation),
/// End of file.
Eof,
}
impl Reading {
/// Gets the read token inside the [`Reading`] as `Option<Token>`
///
/// # Returns
///
/// Returns `None` if the [`Reading`] is [`Reading::Eof`].
#[must_use]
pub fn into_token(self) -> Option<Token> {
match self {
Self::Atomic(token) => Some(token),
Self::IntoDelimited(punc) | Self::DelimitedEnd(punc) => Some(Token::Punctuation(punc)),
Self::Eof => None,
}
}
}

View File

@ -0,0 +1,138 @@
//! Syntax tree nodes for declarations.
#![allow(missing_docs)]
use getset::Getters;
use crate::{
base::{
source_file::{SourceElement, Span},
Handler,
},
lexical::{
token::{Identifier, Keyword, KeywordKind, Punctuation, Token},
token_stream::Delimiter,
},
syntax::{
error::{Error, SyntaxKind, UnexpectedSyntax},
parser::{Parser, Reading},
},
};
use super::{statement::Block, ConnectedList};
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Declaration {
Function(Function),
}
impl SourceElement for Declaration {
fn span(&self) -> Span {
match self {
Self::Function(function) => function.span(),
}
}
}
/// Syntax Synopsis:
///
/// ``` ebnf
/// Function:
/// 'function' Identifier '(' ParameterList? ')' Block
/// ;
///
/// ParameterList:
/// Identifier (',' Identifier)* ','?
/// ;
/// ```
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)]
pub struct Function {
#[get = "pub"]
function_keyword: Keyword,
#[get = "pub"]
identifier: Identifier,
#[get = "pub"]
open_paren: Punctuation,
#[get = "pub"]
parameters: Option<ConnectedList<Identifier, Punctuation>>,
#[get = "pub"]
close_paren: Punctuation,
#[get = "pub"]
block: Block,
}
impl Function {
/// Dissolves the [`Function`] into its components.
#[must_use]
pub fn dissolve(
self,
) -> (
Keyword,
Identifier,
Punctuation,
Option<ConnectedList<Identifier, Punctuation>>,
Punctuation,
Block,
) {
(
self.function_keyword,
self.identifier,
self.open_paren,
self.parameters,
self.close_paren,
self.block,
)
}
}
impl SourceElement for Function {
fn span(&self) -> Span {
self.function_keyword.span.join(&self.block.span()).unwrap()
}
}
impl<'a> Parser<'a> {
pub fn parse_declaration(&mut self, handler: &impl Handler<Error>) -> Option<Declaration> {
match self.stop_at_significant() {
Reading::Atomic(Token::Keyword(function_keyword))
if function_keyword.keyword == KeywordKind::Function =>
{
// eat the function keyword
self.forward();
// parse the identifier
let identifier = self.parse_identifier(handler)?;
let delimited_tree = self.parse_enclosed_list(
Delimiter::Parenthesis,
',',
|parser: &mut Parser<'_>| parser.parse_identifier(handler),
handler,
)?;
// parse the block
let block = self.parse_block(handler)?;
Some(Declaration::Function(Function {
function_keyword,
identifier,
open_paren: delimited_tree.open,
parameters: delimited_tree.list,
close_paren: delimited_tree.close,
block,
}))
}
unexpected => {
// make progress
self.forward();
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
expected: SyntaxKind::Declaration,
found: unexpected.into_token(),
}));
None
}
}
}
}

View File

@ -0,0 +1 @@
//! Syntax tree nodes for expressions.

View File

@ -0,0 +1,191 @@
//! Contains the syntax tree nodes that represent the structure of the source code.
use getset::Getters;
use crate::{
base::{
source_file::{SourceElement, Span},
Handler,
},
lexical::{
token::{Punctuation, Token},
token_stream::Delimiter,
},
syntax::parser::Reading,
};
use super::{error::Error, parser::Parser};
pub mod declaration;
pub mod expression;
pub mod program;
pub mod statement;
/// Represents a syntax tree node with a pattern of syntax tree nodes separated by a separator.
///
/// This struct is useful for representing syntax tree nodes that are separated by a separator.
/// For example, a comma separated list of expressions such as `1, 2, 3` can be represented by a
/// [`ConnectedList`] with the separator being a comma token and the elements being the expressions.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)]
pub struct ConnectedList<Element, Separator> {
/// The first element of the list.
#[get = "pub"]
first: Element,
/// The rest of the elements of the list.
///
/// Each element of the list is a tuple containing the separator and the element. The separator
/// is the token/syntax tree node that separates the current element from the prior one.
#[get = "pub"]
rest: Vec<(Separator, Element)>,
/// The trailing separator of the list.
#[get = "pub"]
trailing_separator: Option<Separator>,
}
/// Represents a syntax tree node with a pattern of having [`ConnectedList`] delimited by a pair of
/// punctuation like such `(a, b, c)`.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct DelimitedList<T> {
/// The open punctuation of the list.
pub open: Punctuation,
/// The list of elements of the list.
///
/// If `None` then the list is empty (or immediately closed after the open punctuation).
pub list: Option<ConnectedList<T, Punctuation>>,
/// The close punctuation of the list.
pub close: Punctuation,
}
impl<'a> Parser<'a> {
/// Parses a list of elements enclosed by a pair of delimiters, separated by a separator.
///
/// The parser position must be at the delimited list of the given delimiter. It will
/// consume the whole delimited list and move the next token after the list.
///
/// # Errors
/// - if the parser position is not at the delimited list of the given delimiter.
/// - any error returned by the given parser function.
pub fn parse_enclosed_list<T>(
&mut self,
delimiter: Delimiter,
separator: char,
mut f: impl FnMut(&mut Self) -> Option<T>,
handler: &impl Handler<Error>,
) -> Option<DelimitedList<T>> {
fn skip_to_next_separator(this: &mut Parser, separator: char) -> Option<Punctuation> {
if let Reading::Atomic(Token::Punctuation(punc)) = this.stop_at(|token| {
matches!(
token, Reading::Atomic(Token::Punctuation(punc))
if punc.punctuation == separator
)
}) {
this.forward();
Some(punc)
} else {
None
}
}
let delimited_tree = self.step_into(
delimiter,
|parser| {
let mut first = None;
let mut rest = Vec::new();
let mut trailing_separator: Option<Punctuation> = None;
while !parser.is_exhausted() {
let Some(element) = f(parser) else {
skip_to_next_separator(parser, separator);
continue;
};
// adds new element
match (&first, &trailing_separator) {
(None, None) => {
first = Some(element);
}
(Some(_), Some(separator)) => {
rest.push((separator.clone(), element));
trailing_separator = None;
}
_ => {
unreachable!()
}
}
// expect separator if not exhausted
if !parser.is_exhausted() {
let Some(separator) = parser.parse_punctuation(separator, true, handler)
else {
if let Some(punctuation) = skip_to_next_separator(parser, separator) {
trailing_separator = Some(punctuation);
}
continue;
};
trailing_separator = Some(separator);
}
}
Some(first.map(|first| ConnectedList {
first,
rest,
trailing_separator,
}))
},
handler,
)?;
Some(DelimitedList {
open: delimited_tree.open,
list: delimited_tree.tree.unwrap(),
close: delimited_tree.close,
})
}
}
impl<Element: SourceElement, Separator: SourceElement> SourceElement
for ConnectedList<Element, Separator>
{
fn span(&self) -> Span {
let end = self.trailing_separator.as_ref().map_or_else(
|| {
self.rest
.last()
.map_or_else(|| self.first.span(), |(_, element)| element.span())
},
SourceElement::span,
);
self.first.span().join(&end).unwrap()
}
}
impl<Element, Separator> ConnectedList<Element, Separator> {
/// Returns an iterator over the elements of the list.
pub fn elements(&self) -> impl Iterator<Item = &Element> {
std::iter::once(&self.first).chain(self.rest.iter().map(|(_, element)| element))
}
/// Returns an iterator over the elements of the list.
pub fn into_elements(self) -> impl Iterator<Item = Element> {
std::iter::once(self.first).chain(self.rest.into_iter().map(|(_, element)| element))
}
/// Gets the number of elements in the list.
pub fn len(&self) -> usize {
self.rest.len() + 1
}
/// Returns `true` if the list is empty.
///
/// The function will never return `false`.
pub fn is_empty(&self) -> bool {
false
}
}

View File

@ -0,0 +1,48 @@
//! The program node of the syntax tree.
use getset::Getters;
use crate::{
base::Handler,
syntax::{
error::Error,
parser::{Parser, Reading},
},
};
use super::declaration::Declaration;
/// Program is a collection of declarations.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)]
pub struct Program {
/// The declarations within the program.
#[get = "pub"]
declarations: Vec<Declaration>,
}
impl<'a> Parser<'a> {
/// Parses a [`Program`].
pub fn parse_program(&mut self, handler: &impl Handler<Error>) -> Option<Program> {
let mut declarations = Vec::new();
while !self.is_exhausted() {
let result = self.parse_declaration(handler);
#[allow(clippy::option_if_let_else)]
if let Some(x) = result {
declarations.push(x);
} else {
self.stop_at(|reading| {
matches!(
reading,
Reading::IntoDelimited(x) if x.punctuation == '{'
)
});
self.next_token();
}
}
Some(Program { declarations })
}
}

View File

@ -0,0 +1,148 @@
//! Syntax tree nodes for statements.
use getset::Getters;
use crate::{
base::{
source_file::{SourceElement, Span},
Handler,
},
lexical::{
token::{LiteralCommand, Punctuation, Token},
token_stream::Delimiter,
},
syntax::{
error::{Error, SyntaxKind, UnexpectedSyntax},
parser::{Parser, Reading},
},
};
/// Syntax Synopsis:
///
/// ``` ebnf
/// Statement:
/// Block
/// | Conditional
/// ;
/// ```
#[allow(missing_docs)]
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Statement {
Block(Block),
LiteralCommand(LiteralCommand),
// Conditional(Conditional),
}
impl SourceElement for Statement {
fn span(&self) -> Span {
match self {
Self::Block(block) => block.span(),
Self::LiteralCommand(literal_command) => literal_command.span(),
//Self::Conditional(conditional) => conditional.span(),
}
}
}
/// Syntax Synopsis:
///
/// ``` ebnf
/// Block:
/// '{' Statement* '}'
/// ;
/// ```
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)]
pub struct Block {
/// The opening brace of the block.
#[get = "pub"]
pub open_brace: Punctuation,
/// The statements within the block.
#[get = "pub"]
pub statements: Vec<Statement>,
/// The closing brace of the block.
#[get = "pub"]
pub close_brace: Punctuation,
}
impl Block {
/// Dissolves the [`Block`] into its components.
#[must_use]
pub fn dissolve(self) -> (Punctuation, Vec<Statement>, Punctuation) {
(self.open_brace, self.statements, self.close_brace)
}
}
impl SourceElement for Block {
fn span(&self) -> Span {
self.open_brace
.span()
.join(&self.close_brace.span())
.unwrap()
}
}
impl<'a> Parser<'a> {
/// Parses a [`Block`].
pub fn parse_block(&mut self, handler: &impl Handler<Error>) -> Option<Block> {
let token_tree = self.step_into(
Delimiter::Brace,
|parser| {
let mut statements = Vec::new();
while !parser.is_exhausted() {
parser.parse_statement(handler).map_or_else(
|| {
// error recovery
parser.stop_at(|reading| matches!(
reading,
Reading::Atomic(Token::Punctuation(punc)) if punc.punctuation == ';'
) || matches!(
reading,
Reading::IntoDelimited(punc) if punc.punctuation == '{'
));
// goes after the semicolon or the open brace
parser.forward();
},
|statement| statements.push(statement),
);
}
Some(statements)
},
handler,
)?;
Some(Block {
open_brace: token_tree.open,
statements: token_tree.tree?,
close_brace: token_tree.close,
})
}
/// Parses a [`Statement`].
pub fn parse_statement(&mut self, handler: &impl Handler<Error>) -> Option<Statement> {
match self.stop_at_significant() {
// variable declaration
Reading::Atomic(Token::LiteralCommand(command)) => {
self.forward();
Some(Statement::LiteralCommand(command))
}
// block statement
Reading::IntoDelimited(open_brace) if open_brace.punctuation == '{' => {
let block = self.parse_block(handler)?;
Some(Statement::Block(block))
}
// other
unexpected => {
handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {
expected: SyntaxKind::Statement,
found: unexpected.into_token(),
}));
None
}
}
}
}