From b9bc5438e5d682d04ab7b4dbcf7218cc2f3389a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20H=C3=B6lting?= <87192362+moritz-hoelting@users.noreply.github.com> Date: Mon, 1 Apr 2024 20:42:38 +0200 Subject: [PATCH] Implement annotations for functions --- grammar.md | 7 +- src/base/source_file.rs | 8 +- src/lexical/token.rs | 7 +- src/lexical/token_stream.rs | 3 + src/lib.rs | 43 ++++++++- src/syntax/parser.rs | 28 +++--- src/syntax/syntax_tree/declaration.rs | 134 +++++++++++++++++++++++++- 7 files changed, 204 insertions(+), 26 deletions(-) diff --git a/grammar.md b/grammar.md index c0a8ecf..c573a7f 100644 --- a/grammar.md +++ b/grammar.md @@ -15,13 +15,18 @@ Declaration: FunctionDeclaration; ### FunctionDeclaration ```ebnf Function: - 'fn' Identifier '(' ParameterList? ')' Block + Annotation* 'fn' Identifier '(' ParameterList? ')' Block ; ParameterList: Identifier (',' Identifier)* ','? ; ``` +### Annotation +```ebnf +Annotation: '#[' Identifier ('=' StringLiteral)? ']'; +``` + ### Statement ```ebnf Statement: diff --git a/src/base/source_file.rs b/src/base/source_file.rs index 37be4ef..a85241a 100644 --- a/src/base/source_file.rs +++ b/src/base/source_file.rs @@ -6,7 +6,7 @@ use std::{ fs, iter::{Iterator, Peekable}, ops::Range, - path::PathBuf, + path::{Path, PathBuf}, str::CharIndices, sync::Arc, }; @@ -85,9 +85,9 @@ impl SourceFile { /// /// # Errors /// - [`Error::IoError`]: Error occurred when reading the file contents. - pub fn load(path: PathBuf) -> Result, Error> { - let source = fs::read_to_string(&path).map_err(Error::IoError)?; - Ok(Self::new(path, source)) + pub fn load(path: &Path) -> Result, Error> { + let source = fs::read_to_string(path).map_err(Error::IoError)?; + Ok(Self::new(path.to_path_buf(), source)) } /// Get the [`Location`] of a given byte index diff --git a/src/lexical/token.rs b/src/lexical/token.rs index 761ff8e..bdf24d9 100644 --- a/src/lexical/token.rs +++ b/src/lexical/token.rs @@ -7,6 +7,7 @@ use crate::base::{ Handler, }; use derive_more::From; +use enum_as_inner::EnumAsInner; use strum::IntoEnumIterator; use strum_macros::EnumIter; @@ -64,8 +65,8 @@ impl KeywordKind { } } -/// Is an enumeration containing all kinds of tokens in the Flux programming language. -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, From)] +/// Is an enumeration containing all kinds of tokens in the Shulkerscript programming language. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, From, EnumAsInner)] #[allow(missing_docs)] pub enum Token { WhiteSpaces(WhiteSpaces), @@ -202,7 +203,7 @@ impl SourceElement for StringLiteral { } } -/// Is an enumeration representing the two kinds of comments in the Flux programming language. +/// Is an enumeration representing the two kinds of comments in the Shulkerscript programming language. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum CommentKind { /// A comment that starts with `//` and ends at the end of the line. diff --git a/src/lexical/token_stream.rs b/src/lexical/token_stream.rs index 9d285ac..b001b55 100644 --- a/src/lexical/token_stream.rs +++ b/src/lexical/token_stream.rs @@ -173,8 +173,11 @@ pub enum TokenTree { #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[allow(missing_docs)] pub enum Delimiter { + /// () Parenthesis, + /// {} Brace, + /// [] Bracket, } diff --git a/src/lib.rs b/src/lib.rs index dfee576..1344b13 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,19 +17,58 @@ pub mod compile; pub mod lexical; pub mod syntax; -use std::{cell::Cell, fmt::Display, path::PathBuf}; +use std::{cell::Cell, fmt::Display, path::Path}; use base::{source_file::SourceFile, Handler, Result}; use compile::compiler::Compiler; use shulkerbox::{util::compile::CompileOptions, virtual_fs::VFolder}; +use syntax::syntax_tree::program::Program; use crate::{base::Error, lexical::token_stream::TokenStream, syntax::parser::Parser}; +/// Converts the given source code to tokens. +/// +/// # Errors +/// - If an error occurs while reading the file. +pub fn tokenize(path: &Path) -> Result { + let source_file = SourceFile::load(path)?; + + let printer = Printer::new(); + + Ok(TokenStream::tokenize(&source_file, &printer)) +} + +/// Parses the given source code. +/// +/// # Errors +/// - If an error occurs while reading the file. +/// - If an error occurs while parsing the source code. +pub fn parse(path: &Path) -> Result { + let source_file = SourceFile::load(path)?; + + let printer = Printer::new(); + + let tokens = TokenStream::tokenize(&source_file, &printer); + + if printer.has_printed() { + return Err(Error::Other( + "An error occurred while tokenizing the source code.", + )); + } + + let mut parser = Parser::new(&tokens); + let program = parser.parse_program(&printer).ok_or(Error::Other( + "An error occured while parsing the source code.", + ))?; + + Ok(program) +} + /// Compiles the given source code. /// /// # Errors /// - If an error occurs while reading the file. -pub fn compile(path: PathBuf) -> Result { +pub fn compile(path: &Path) -> Result { let source_file = SourceFile::load(path)?; let printer = Printer::new(); diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index 2d8082e..e9e5fd7 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -142,6 +142,20 @@ impl<'a> Parser<'a> { close: close_punctuation, }) } + + /// Tries to parse the given function, and if it fails, resets the current index to the + /// `current_index` before the function call. + pub fn try_parse(&mut self, f: impl FnOnce(&mut Self) -> Option) -> Option { + let current_index = self.current_frame.current_index; + + let result = f(self); + + if result.is_none() { + self.current_frame.current_index = current_index; + } + + result + } } /// Represents a result of [`Parser::step_into()`] function. @@ -409,20 +423,6 @@ impl<'a> Frame<'a> { } } } - - /// Tries to parse the given function, and if it fails, resets the current index to the - /// `current_index` before the function call. - pub fn try_parse(&mut self, f: impl FnOnce(&mut Self) -> Option) -> Option { - let current_index = self.current_index; - - let result = f(self); - - if result.is_none() { - self.current_index = current_index; - } - - result - } } /// Represents the read value of the [`Frame`]. diff --git a/src/syntax/syntax_tree/declaration.rs b/src/syntax/syntax_tree/declaration.rs index 026b4fc..c3bba94 100644 --- a/src/syntax/syntax_tree/declaration.rs +++ b/src/syntax/syntax_tree/declaration.rs @@ -10,7 +10,7 @@ use crate::{ Handler, }, lexical::{ - token::{Identifier, Keyword, KeywordKind, Punctuation, Token}, + token::{Identifier, Keyword, KeywordKind, Punctuation, StringLiteral, Token}, token_stream::Delimiter, }, syntax::{ @@ -33,12 +33,62 @@ impl SourceElement for Declaration { } } } +/// Syntax Synopsis: +/// +/// ``` ebnf +/// Annotation: +/// '#[' Identifier ('=' StringLiteral)? ']' +/// ; +/// ``` +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)] +pub struct Annotation { + #[get = "pub"] + pound_sign: Punctuation, + #[get = "pub"] + open_bracket: Punctuation, + #[get = "pub"] + identifier: Identifier, + #[get = "pub"] + value: Option<(Punctuation, StringLiteral)>, + #[get = "pub"] + close_bracket: Punctuation, +} + +impl Annotation { + /// Dissolves the [`Annotation`] into its components. + #[must_use] + pub fn dissolve( + self, + ) -> ( + Punctuation, + Punctuation, + Identifier, + Option<(Punctuation, StringLiteral)>, + Punctuation, + ) { + ( + self.pound_sign, + self.open_bracket, + self.identifier, + self.value, + self.close_bracket, + ) + } +} +impl SourceElement for Annotation { + fn span(&self) -> Span { + self.pound_sign + .span + .join(&self.close_bracket.span()) + .unwrap() + } +} /// Syntax Synopsis: /// /// ``` ebnf /// Function: -/// 'fn' Identifier '(' ParameterList? ')' Block +/// Annotation* 'fn' Identifier '(' ParameterList? ')' Block /// ; /// /// ParameterList: @@ -47,6 +97,8 @@ impl SourceElement for Declaration { /// ``` #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)] pub struct Function { + #[get = "pub"] + annotations: Vec, #[get = "pub"] function_keyword: Keyword, #[get = "pub"] @@ -64,9 +116,11 @@ pub struct Function { impl Function { /// Dissolves the [`Function`] into its components. #[must_use] + #[allow(clippy::type_complexity)] pub fn dissolve( self, ) -> ( + Vec, Keyword, Identifier, Punctuation, @@ -75,6 +129,7 @@ impl Function { Block, ) { ( + self.annotations, self.function_keyword, self.identifier, self.open_paren, @@ -92,6 +147,59 @@ impl SourceElement for Function { } impl<'a> Parser<'a> { + pub fn parse_annotation(&mut self, handler: &impl Handler) -> Option { + match self.stop_at_significant() { + Reading::Atomic(Token::Punctuation(punctuation)) if punctuation.punctuation == '#' => { + // eat the pound sign + self.forward(); + + // step into the brackets + let content = self.step_into( + Delimiter::Bracket, + |parser| { + let identifier = parser.parse_identifier(handler)?; + + let value = if let Reading::Atomic(Token::Punctuation(punctuation)) = + parser.stop_at_significant() + { + if punctuation.punctuation == '=' { + // eat the equals sign + parser.forward(); + + // parse the string literal + let string_literal = parser + .next_significant_token() + .into_token()? + .into_string_literal() + .ok()?; + + Some((punctuation, string_literal)) + } else { + None + } + } else { + None + }; + + Some((identifier, value)) + }, + handler, + )?; + + let (identifier, value) = content.tree?; + + Some(Annotation { + pound_sign: punctuation, + open_bracket: content.open, + identifier, + value, + close_bracket: content.close, + }) + } + _ => None, + } + } + pub fn parse_declaration(&mut self, handler: &impl Handler) -> Option { match self.stop_at_significant() { Reading::Atomic(Token::Keyword(function_keyword)) @@ -113,6 +221,7 @@ impl<'a> Parser<'a> { let block = self.parse_block(handler)?; Some(Declaration::Function(Function { + annotations: Vec::new(), function_keyword, identifier, open_paren: delimited_tree.open, @@ -122,6 +231,27 @@ impl<'a> Parser<'a> { })) } + // parse annotations + Reading::Atomic(Token::Punctuation(punctuation)) if punctuation.punctuation == '#' => { + // parse the annotation + let mut annotations = Vec::new(); + + while let Some(annotation) = + self.try_parse(|parser| parser.parse_annotation(handler)) + { + annotations.push(annotation); + } + + // parse the function + self.parse_declaration(handler) + .map(|declaration| match declaration { + Declaration::Function(mut function) => { + function.annotations.extend(annotations); + Declaration::Function(function) + } + }) + } + unexpected => { // make progress self.forward();