diff --git a/CHANGELOG.md b/CHANGELOG.md index 6cd44d8..c2f270a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Macro strings +- Function parameters/arguments + ### Changed - Option to deduplicate source files during serialization when using `SerdeWrapper` diff --git a/Cargo.toml b/Cargo.toml index f541552..d8334b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,7 +37,8 @@ mlua = { version = "0.10.2", features = ["lua54", "vendored"], optional = true } path-absolutize = "3.1.1" pathdiff = "0.2.3" serde = { version = "1.0.217", features = ["derive"], optional = true } -shulkerbox = { version = "0.1.0", default-features = false, optional = true } +# shulkerbox = { version = "0.1.0", default-features = false, optional = true } +shulkerbox = { git = "https://github.com/moritz-hoelting/shulkerbox", rev = "76d58c0766518fe5ab2635de60ba40972565a3e0", default-features = false, optional = true } strsim = "0.11.1" strum = { version = "0.27.0", features = ["derive"] } thiserror = "2.0.11" diff --git a/grammar.md b/grammar.md index 0419177..a17cc85 100644 --- a/grammar.md +++ b/grammar.md @@ -12,6 +12,21 @@ Program: Namespace Declaration*; Namespace: 'namespace' StringLiteral; ``` +### StringLiteral +```ebnf +StringLiteral: '"' TEXT '"'; +``` + +### MacroStringLiteral +```ebnf +MacroStringLiteral: '`' ( TEXT | '$(' [a-zA-Z0-9_]+ ')' )* '`'; +``` + +### AnyStringLiteral +```ebnf +AnyStringLiteral: StringLiteral | MacroStringLiteral; +``` + ### Declaration ```ebnf Declaration: FunctionDeclaration | Import | TagDeclaration; @@ -87,7 +102,7 @@ Condition: PrimaryCondition: ConditionalPrefix | ParenthesizedCondition - | StringLiteral + | AnyStringLiteral ; ``` @@ -144,6 +159,8 @@ Expression: ```ebnf Primary: FunctionCall + | AnyStringLiteral + | LuaCode ; ``` diff --git a/src/base/error.rs b/src/base/error.rs index ec14dba..7a61661 100644 --- a/src/base/error.rs +++ b/src/base/error.rs @@ -9,6 +9,8 @@ pub enum Error { #[error(transparent)] ParseError(#[from] crate::syntax::error::Error), #[error(transparent)] + SemanticError(#[from] crate::semantic::error::Error), + #[error(transparent)] TranspileError(#[from] crate::transpile::TranspileError), #[error("An error occurred: {0}")] Other(String), diff --git a/src/base/source_file.rs b/src/base/source_file.rs index e4ab874..3f98dce 100644 --- a/src/base/source_file.rs +++ b/src/base/source_file.rs @@ -254,6 +254,26 @@ impl Span { }) } + /// Create a span from the given start byte index to the end of the source file with an offset. + #[must_use] + pub fn to_end_with_offset( + source_file: Arc, + start: usize, + end_offset: isize, + ) -> Option { + if !source_file.content().is_char_boundary(start) { + return None; + } + Some(Self { + start, + end: source_file + .content() + .len() + .saturating_add_signed(end_offset), + source_file, + }) + } + /// Get the string slice of the source code that the span represents. #[must_use] pub fn str(&self) -> &str { diff --git a/src/lexical/token.rs b/src/lexical/token.rs index e04f7cf..54bcb84 100644 --- a/src/lexical/token.rs +++ b/src/lexical/token.rs @@ -4,6 +4,7 @@ use std::{borrow::Cow, collections::HashMap, fmt::Display, str::FromStr, sync::O use crate::base::{ self, + log::SourceCodeDisplay, source_file::{SourceElement, SourceIterator, Span}, Handler, }; @@ -145,24 +146,7 @@ pub enum Token { DocComment(DocComment), CommandLiteral(CommandLiteral), StringLiteral(StringLiteral), -} - -impl Token { - /// Returns the span of the token. - #[must_use] - pub fn span(&self) -> &Span { - match self { - Self::WhiteSpaces(token) => &token.span, - Self::Identifier(token) => &token.span, - Self::Keyword(token) => &token.span, - Self::Punctuation(token) => &token.span, - Self::Numeric(token) => &token.span, - Self::Comment(token) => &token.span, - Self::DocComment(token) => &token.span, - Self::CommandLiteral(token) => &token.span, - Self::StringLiteral(token) => &token.span, - } - } + MacroStringLiteral(MacroStringLiteral), } impl SourceElement for Token { @@ -177,6 +161,7 @@ impl SourceElement for Token { Self::DocComment(token) => token.span(), Self::CommandLiteral(token) => token.span(), Self::StringLiteral(token) => token.span(), + Self::MacroStringLiteral(token) => token.span(), } } } @@ -290,6 +275,76 @@ impl SourceElement for StringLiteral { } } +/// Represents a hardcoded macro string literal value in the source code. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct MacroStringLiteral { + /// The backtick that starts the macro string literal. + starting_backtick: Punctuation, + /// The parts that make up the macro string literal. + parts: Vec, + /// The backtick that ends the macro string literal. + ending_backtick: Punctuation, +} + +impl MacroStringLiteral { + /// Returns the string content without escapement characters, leading and trailing double quotes. + #[cfg(feature = "shulkerbox")] + #[must_use] + pub fn str_content(&self) -> String { + use std::fmt::Write; + + let mut content = String::new(); + + for part in &self.parts { + match part { + MacroStringLiteralPart::Text(span) => { + content += &crate::util::unescape_macro_string(span.str()); + } + MacroStringLiteralPart::MacroUsage { identifier, .. } => { + write!( + content, + "$({})", + crate::transpile::util::identifier_to_macro(identifier.span.str()) + ) + .expect("can always write to string"); + } + } + } + + content + } + + /// Returns the parts that make up the macro string literal. + #[must_use] + pub fn parts(&self) -> &[MacroStringLiteralPart] { + &self.parts + } +} + +impl SourceElement for MacroStringLiteral { + fn span(&self) -> Span { + self.starting_backtick + .span + .join(&self.ending_backtick.span) + .expect("Invalid macro string literal span") + } +} + +/// Represents a part of a macro string literal value in the source code. +#[allow(missing_docs)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum MacroStringLiteralPart { + Text(Span), + MacroUsage { + dollar: Punctuation, + open_brace: Punctuation, + identifier: Identifier, + close_brace: Punctuation, + }, +} + /// Is an enumeration representing the two kinds of comments in the Shulkerscript programming language. #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -362,7 +417,7 @@ impl CommandLiteral { } /// Is an error that can occur when invoking the [`Token::tokenize`] method. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, thiserror::Error, From)] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, thiserror::Error)] #[allow(missing_docs)] pub enum TokenizeError { #[error("encountered a fatal lexical error that causes the process to stop.")] @@ -370,8 +425,95 @@ pub enum TokenizeError { #[error("the iterator argument is at the end of the source code.")] EndOfSourceCodeIteratorArgument, + + #[error(transparent)] + InvalidMacroNameCharacter(#[from] InvalidMacroNameCharacter), + + #[error(transparent)] + UnclosedMacroUsage(#[from] UnclosedMacroUsage), + + #[error(transparent)] + EmptyMacroUsage(#[from] EmptyMacroUsage), } +/// Is an error that can occur when the macro name contains invalid characters. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct InvalidMacroNameCharacter { + /// The span of the invalid characters. + pub span: Span, +} + +impl Display for InvalidMacroNameCharacter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + base::log::Message::new(base::log::Severity::Error, format!("The macro name contains invalid characters: `{}`. Only alphanumeric characters and underscores are allowed.", self.span.str())) + )?; + write!( + f, + "\n{}", + SourceCodeDisplay::new(&self.span, Option::::None) + ) + } +} + +impl std::error::Error for InvalidMacroNameCharacter {} + +/// Is an error that can occur when the macro usage is not closed. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct UnclosedMacroUsage { + /// The span of the unclosed macro usage. + pub span: Span, +} + +impl Display for UnclosedMacroUsage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + base::log::Message::new( + base::log::Severity::Error, + "A macro usage was opened with `$(` but never closed." + ) + )?; + write!( + f, + "\n{}", + SourceCodeDisplay::new(&self.span, Option::::None) + ) + } +} + +impl std::error::Error for UnclosedMacroUsage {} + +/// Is an error that can occur when the macro usage is not closed. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct EmptyMacroUsage { + /// The span of the unclosed macro usage. + pub span: Span, +} + +impl Display for EmptyMacroUsage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + base::log::Message::new( + base::log::Severity::Error, + "A macro usage was opened with `$(` but closed immediately with `)`." + ) + )?; + write!( + f, + "\n{}", + SourceCodeDisplay::new(&self.span, Option::::None) + ) + } +} + +impl std::error::Error for EmptyMacroUsage {} + impl Token { /// Increments the iterator while the predicate returns true. pub fn walk_iter(iter: &mut SourceIterator, predicate: impl Fn(char) -> bool) { @@ -385,6 +527,7 @@ impl Token { } /// Creates a span from the given start location to the current location of the iterator. + #[must_use] fn create_span(start: usize, iter: &mut SourceIterator) -> Span { iter.peek().map_or_else( || Span::to_end(iter.source_file().clone(), start).unwrap(), @@ -392,6 +535,26 @@ impl Token { ) } + /// Creates a span from the given start location to the current location of the iterator with the given offset. + #[must_use] + fn create_span_with_end_offset( + start: usize, + iter: &mut SourceIterator, + end_offset: isize, + ) -> Span { + iter.peek().map_or_else( + || Span::to_end_with_offset(iter.source_file().clone(), start, end_offset).unwrap(), + |(index, _)| { + Span::new( + iter.source_file().clone(), + start, + index.saturating_add_signed(end_offset), + ) + .unwrap() + }, + ) + } + /// Checks if the given character is a valid first character of an identifier. fn is_first_identifier_character(character: char) -> bool { character == '_' @@ -551,6 +714,113 @@ impl Token { .into() } + /// Handles a sequence of characters that are enclosed in backticks and contain macro usages + fn handle_macro_string_literal( + iter: &mut SourceIterator, + mut start: usize, + ) -> Result { + let mut is_escaped = false; + let mut is_inside_macro = false; + let mut encountered_open_parenthesis = false; + let starting_backtick = Punctuation { + span: Self::create_span(start, iter), + punctuation: '`', + }; + start += 1; + let mut parts = Vec::new(); + + while iter.peek().is_some() { + let (index, character) = iter.next().unwrap(); + + #[expect(clippy::collapsible_else_if)] + if is_inside_macro { + if character == ')' { + // Check if the macro usage is empty + if start + 2 == index { + return Err(EmptyMacroUsage { + span: Span::new(iter.source_file().clone(), start, index + 1).unwrap(), + } + .into()); + } + parts.push(MacroStringLiteralPart::MacroUsage { + dollar: Punctuation { + span: Span::new(iter.source_file().clone(), start, start + 1).unwrap(), + punctuation: '$', + }, + open_brace: Punctuation { + span: Span::new(iter.source_file().clone(), start + 1, start + 2) + .unwrap(), + punctuation: '(', + }, + identifier: Identifier { + span: Self::create_span_with_end_offset(start + 2, iter, -1), + }, + close_brace: Punctuation { + span: Span::new(iter.source_file().clone(), index, index + 1).unwrap(), + punctuation: ')', + }, + }); + start = index + 1; + is_inside_macro = false; + } else if !encountered_open_parenthesis && character == '(' { + encountered_open_parenthesis = true; + } else if encountered_open_parenthesis && !Self::is_identifier_character(character) + { + if character == '`' { + return Err(UnclosedMacroUsage { + span: Span::new(iter.source_file().clone(), start, start + 2).unwrap(), + } + .into()); + } + + Self::walk_iter(iter, |c| c != ')' && !Self::is_identifier_character(c)); + return Err(InvalidMacroNameCharacter { + span: Self::create_span(index, iter), + } + .into()); + } + } else { + if character == '$' && iter.peek().is_some_and(|(_, c)| c == '(') { + parts.push(MacroStringLiteralPart::Text( + Self::create_span_with_end_offset(start, iter, -1), + )); + start = index; + is_inside_macro = true; + encountered_open_parenthesis = false; + } else if character == '\\' { + is_escaped = !is_escaped; + } else if character == '`' && !is_escaped { + if start != index { + parts.push(MacroStringLiteralPart::Text( + Self::create_span_with_end_offset(start, iter, -1), + )); + } + start = index; + break; + } else { + is_escaped = false; + } + } + } + + if is_inside_macro { + Err(UnclosedMacroUsage { + span: Span::new(iter.source_file().clone(), start, start + 2).unwrap(), + } + .into()) + } else { + Ok(MacroStringLiteral { + starting_backtick, + parts, + ending_backtick: Punctuation { + span: Self::create_span(start, iter), + punctuation: '`', + }, + } + .into()) + } + } + /// Handles a command that is preceeded by a slash fn handle_command_literal(iter: &mut SourceIterator, start: usize) -> Self { Self::walk_iter(iter, |c| !(c.is_whitespace() && c.is_ascii_control())); @@ -592,9 +862,15 @@ impl Token { // Found comment/single slash punctuation else if character == '/' { Self::handle_comment(iter, start, character, prev_token, handler) - } else if character == '"' { + } + // Found string literal + else if character == '"' { Ok(Self::handle_string_literal(iter, start)) } + // Found macro string literal + else if character == '`' { + Self::handle_macro_string_literal(iter, start) + } // Found numeric literal else if character.is_ascii_digit() { Ok(Self::handle_numeric_literal(iter, start)) diff --git a/src/lexical/token_stream.rs b/src/lexical/token_stream.rs index 1e3fb99..9502e4a 100644 --- a/src/lexical/token_stream.rs +++ b/src/lexical/token_stream.rs @@ -5,10 +5,13 @@ use std::{fmt::Debug, sync::Arc}; use derive_more::{Deref, From}; use enum_as_inner::EnumAsInner; -use crate::base::{ - self, - source_file::{SourceElement, SourceFile, Span}, - Handler, +use crate::{ + base::{ + self, + source_file::{SourceElement, SourceFile, Span}, + Handler, + }, + lexical::Error, }; use super::{ @@ -62,6 +65,17 @@ impl TokenStream { Err(TokenizeError::FatalLexicalError) => { tracing::error!("Fatal lexical error encountered while tokenizing source code"); } + Err(TokenizeError::InvalidMacroNameCharacter(err)) => { + handler.receive(Error::TokenizeError( + TokenizeError::InvalidMacroNameCharacter(err), + )); + } + Err(TokenizeError::UnclosedMacroUsage(err)) => { + handler.receive(Error::TokenizeError(TokenizeError::UnclosedMacroUsage(err))); + } + Err(TokenizeError::EmptyMacroUsage(err)) => { + handler.receive(Error::TokenizeError(TokenizeError::EmptyMacroUsage(err))); + } } } @@ -184,7 +198,7 @@ pub enum TokenTree { impl SourceElement for TokenTree { fn span(&self) -> Span { match self { - Self::Token(token) => token.span().to_owned(), + Self::Token(token) => token.span(), Self::Delimited(delimited) => delimited .open .span() diff --git a/src/lib.rs b/src/lib.rs index 463ec98..f73b2cc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,8 +17,10 @@ pub use shulkerbox; pub mod base; pub mod lexical; +pub mod semantic; pub mod syntax; pub mod transpile; +pub mod util; #[cfg(feature = "serde")] pub(crate) mod serde; @@ -106,6 +108,8 @@ pub fn parse( )); } + program.analyze_semantics(handler)?; + Ok(program) } diff --git a/src/semantic/error.rs b/src/semantic/error.rs new file mode 100644 index 0000000..9be7db1 --- /dev/null +++ b/src/semantic/error.rs @@ -0,0 +1,286 @@ +//! Error types for the semantic analysis phase of the compiler. + +#![allow(missing_docs)] + +use std::{collections::HashSet, fmt::Display}; + +use getset::Getters; +use itertools::Itertools as _; + +use crate::{ + base::{ + log::{Message, Severity, SourceCodeDisplay}, + source_file::{SourceElement as _, Span}, + }, + lexical::token::StringLiteral, + syntax::syntax_tree::expression::Expression, +}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error(transparent)] + MissingFunctionDeclaration(#[from] MissingFunctionDeclaration), + #[error(transparent)] + UnexpectedExpression(#[from] UnexpectedExpression), + #[error(transparent)] + ConflictingFunctionNames(#[from] ConflictingFunctionNames), + #[error(transparent)] + InvalidNamespaceName(#[from] InvalidNamespaceName), + #[error(transparent)] + UnresolvedMacroUsage(#[from] UnresolvedMacroUsage), + #[error(transparent)] + IncompatibleFunctionAnnotation(#[from] IncompatibleFunctionAnnotation), +} + +/// An error that occurs when a function declaration is missing. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Getters)] +pub struct MissingFunctionDeclaration { + #[get = "pub"] + span: Span, + #[get = "pub"] + alternatives: Vec, +} + +impl MissingFunctionDeclaration { + pub(super) fn from_context(identifier_span: Span, functions: &HashSet) -> Self { + let own_name = identifier_span.str(); + let alternatives = functions + .iter() + .filter_map(|function_name| { + let normalized_distance = + strsim::normalized_damerau_levenshtein(own_name, function_name); + (normalized_distance > 0.8 + || strsim::damerau_levenshtein(own_name, function_name) < 3) + .then_some((normalized_distance, function_name)) + }) + .sorted_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal)) + .map(|(_, data)| data) + .take(8) + .cloned() + .collect::>(); + + Self { + alternatives, + span: identifier_span, + } + } +} + +impl Display for MissingFunctionDeclaration { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use std::fmt::Write; + + let message = format!( + "no matching function declaration found for invocation of function `{}`", + self.span.str() + ); + write!(f, "{}", Message::new(Severity::Error, message))?; + + let help_message = if self.alternatives.is_empty() { + None + } else { + let mut message = String::from("did you mean "); + for (i, alternative) in self.alternatives.iter().enumerate() { + if i > 0 { + message.push_str(", "); + } + write!(message, "`{alternative}`")?; + } + Some(message + "?") + }; + + write!( + f, + "\n{}", + SourceCodeDisplay::new(&self.span, help_message.as_ref()) + ) + } +} + +impl std::error::Error for MissingFunctionDeclaration {} + +/// An error that occurs when a function declaration is missing. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct UnexpectedExpression(pub Expression); + +impl Display for UnexpectedExpression { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + Message::new(Severity::Error, "encountered unexpected expression") + )?; + + write!( + f, + "\n{}", + SourceCodeDisplay::new(&self.0.span(), Option::::None) + ) + } +} + +impl std::error::Error for UnexpectedExpression {} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ConflictingFunctionNames { + pub definition: Span, + pub name: String, +} + +impl Display for ConflictingFunctionNames { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + Message::new( + Severity::Error, + format!("the following function declaration conflicts with an existing function with name `{}`", self.name) + ) + )?; + + write!( + f, + "\n{}", + SourceCodeDisplay::new(&self.definition, Option::::None) + ) + } +} + +impl std::error::Error for ConflictingFunctionNames {} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct InvalidNamespaceName { + pub name: StringLiteral, + pub invalid_chars: String, +} + +impl Display for InvalidNamespaceName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + Message::new( + Severity::Error, + format!( + "Invalid characters in namespace `{}`. The following characters are not allowed in namespace definitions: `{}`", + self.name.str_content(), + self.invalid_chars + ) + ) + )?; + + write!( + f, + "\n{}", + SourceCodeDisplay::new(&self.name.span, Option::::None) + ) + } +} + +impl std::error::Error for InvalidNamespaceName {} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct UnresolvedMacroUsage { + pub span: Span, +} + +impl Display for UnresolvedMacroUsage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + Message::new( + Severity::Error, + format!( + "Macro `{}` was used, but could not be resolved.", + self.span.str(), + ) + ) + )?; + + write!( + f, + "\n{}", + SourceCodeDisplay::new( + &self.span, + Some(format!( + "You might want to add `{}` to the function parameters.", + self.span.str() + )) + ) + ) + } +} + +impl std::error::Error for UnresolvedMacroUsage {} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct IncompatibleFunctionAnnotation { + pub span: Span, + pub reason: String, +} + +impl Display for IncompatibleFunctionAnnotation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + Message::new( + Severity::Error, + format!( + "Annotation `{}` cannot be used here, because {}.", + self.span.str(), + self.reason + ) + ) + )?; + + write!(f, "\n{}", SourceCodeDisplay::new(&self.span, None::)) + } +} + +impl std::error::Error for IncompatibleFunctionAnnotation {} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct InvalidFunctionArguments { + pub span: Span, + pub expected: usize, + pub actual: usize, +} + +impl Display for InvalidFunctionArguments { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + Message::new( + Severity::Error, + format!( + "Expected {} arguments, but got {}.", + self.expected, self.actual + ) + ) + )?; + + let help_message = if self.expected > self.actual { + format!( + "You might want to add {} more arguments.", + self.expected - self.actual + ) + } else { + format!( + "You might want to remove {} arguments.", + self.actual - self.expected + ) + }; + + write!( + f, + "\n{}", + SourceCodeDisplay::new(&self.span, Some(help_message)) + ) + } +} + +impl std::error::Error for InvalidFunctionArguments {} diff --git a/src/semantic/mod.rs b/src/semantic/mod.rs new file mode 100644 index 0000000..aa1e7d0 --- /dev/null +++ b/src/semantic/mod.rs @@ -0,0 +1,574 @@ +//! This module contains the semantic analysis of the AST. + +#![allow(clippy::missing_errors_doc)] + +use std::collections::HashSet; + +use error::{ + IncompatibleFunctionAnnotation, InvalidNamespaceName, MissingFunctionDeclaration, + UnexpectedExpression, UnresolvedMacroUsage, +}; + +use crate::{ + base::{self, source_file::SourceElement as _, Handler}, + lexical::token::{MacroStringLiteral, MacroStringLiteralPart}, + syntax::syntax_tree::{ + condition::{ + BinaryCondition, Condition, ParenthesizedCondition, PrimaryCondition, UnaryCondition, + }, + declaration::{Declaration, Function, ImportItems}, + expression::{Expression, FunctionCall, Primary}, + program::{Namespace, ProgramFile}, + statement::{ + execute_block::{ + Conditional, Else, ExecuteBlock, ExecuteBlockHead, ExecuteBlockHeadItem as _, + ExecuteBlockTail, + }, + Block, Grouping, Run, Semicolon, Statement, + }, + AnyStringLiteral, + }, +}; + +pub mod error; + +impl ProgramFile { + /// Analyzes the semantics of the program. + pub fn analyze_semantics( + &self, + handler: &impl Handler, + ) -> Result<(), error::Error> { + self.namespace().analyze_semantics(handler)?; + + let mut errs = Vec::new(); + let function_names = extract_all_function_names(self.declarations(), handler)?; + + for declaration in self.declarations() { + if let Err(err) = declaration.analyze_semantics(&function_names, handler) { + errs.push(err); + } + } + + #[expect(clippy::option_if_let_else)] + if let Some(err) = errs.first() { + Err(err.clone()) + } else { + Ok(()) + } + } +} + +fn extract_all_function_names( + declarations: &[Declaration], + handler: &impl Handler, +) -> Result, error::Error> { + let mut function_names = HashSet::new(); + let mut errs = Vec::new(); + + for declaration in declarations { + match declaration { + Declaration::Function(func) => { + let name = func.identifier(); + if function_names.contains(name.span.str()) { + let err = error::Error::from(error::ConflictingFunctionNames { + name: name.span.str().to_string(), + definition: name.span(), + }); + handler.receive(err.clone()); + errs.push(err); + } + function_names.insert(name.span.str().to_string()); + } + + Declaration::Import(imp) => match imp.items() { + ImportItems::All(_) => { + handler.receive(base::Error::Other( + "Importing all items is not yet supported.".to_string(), + )); + } + ImportItems::Named(items) => { + for item in items.elements() { + if function_names.contains(item.span.str()) { + let err = error::Error::from(error::ConflictingFunctionNames { + name: item.span.str().to_string(), + definition: item.span(), + }); + handler.receive(err.clone()); + errs.push(err); + } + function_names.insert(item.span.str().to_string()); + } + } + }, + + Declaration::Tag(_) => {} + } + } + + #[expect(clippy::option_if_let_else)] + if let Some(err) = errs.first() { + Err(err.clone()) + } else { + Ok(function_names) + } +} + +impl Namespace { + /// Analyzes the semantics of the namespace. + pub fn analyze_semantics( + &self, + handler: &impl Handler, + ) -> Result<(), error::Error> { + let name = self.namespace_name(); + Self::validate_str(name.str_content().as_ref()).map_err(|invalid_chars| { + let err = error::Error::from(InvalidNamespaceName { + name: name.clone(), + invalid_chars, + }); + handler.receive(err.clone()); + err + }) + } +} + +impl Declaration { + /// Analyzes the semantics of the declaration. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + match self { + Self::Function(func) => func.analyze_semantics(function_names, handler), + Self::Import(_) | Self::Tag(_) => Ok(()), + } + } +} + +impl Function { + /// Analyzes the semantics of the function. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + let macro_names = if let Some(parameters) = self.parameters() { + if let Some(incompatible) = self + .annotations() + .iter() + .find(|a| ["tick", "load"].contains(&a.identifier().span.str())) + { + let err = + error::Error::IncompatibleFunctionAnnotation(IncompatibleFunctionAnnotation { + span: incompatible.identifier().span(), + reason: + "functions with the `tick` or `load` annotation cannot have parameters" + .to_string(), + }); + handler.receive(err.clone()); + return Err(err); + } + + parameters + .elements() + .map(|el| el.span.str().to_string()) + .collect() + } else { + HashSet::new() + }; + + self.block() + .analyze_semantics(function_names, ¯o_names, handler) + } +} + +impl Block { + /// Analyzes the semantics of a block. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + let mut errs = Vec::new(); + for statement in &self.statements { + if let Err(err) = match statement { + Statement::Block(block) => { + block.analyze_semantics(function_names, macro_names, handler) + } + Statement::DocComment(_) | Statement::LiteralCommand(_) => Ok(()), + Statement::ExecuteBlock(ex) => { + ex.analyze_semantics(function_names, macro_names, handler) + } + Statement::Grouping(group) => { + group.analyze_semantics(function_names, macro_names, handler) + } + Statement::Run(run) => run.analyze_semantics(function_names, macro_names, handler), + Statement::Semicolon(sem) => { + sem.analyze_semantics(function_names, macro_names, handler) + } + } { + errs.push(err); + }; + } + + #[expect(clippy::option_if_let_else)] + if let Some(err) = errs.first() { + Err(err.clone()) + } else { + Ok(()) + } + } +} + +impl ExecuteBlock { + /// Analyzes the semantics of the execute block. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + match self { + Self::HeadTail(head, tail) => { + let head_res = head.analyze_semantics(function_names, macro_names, handler); + let tail_res = tail.analyze_semantics(function_names, macro_names, handler); + + if head_res.is_err() { + head_res + } else { + tail_res + } + } + Self::IfElse(cond, then, el) => { + let cond_res = cond.analyze_semantics(function_names, macro_names, handler); + let then_res = then.analyze_semantics(function_names, macro_names, handler); + let else_res = el.analyze_semantics(function_names, macro_names, handler); + + if cond_res.is_err() { + cond_res + } else if then_res.is_err() { + then_res + } else { + else_res + } + } + } + } +} + +impl Grouping { + /// Analyzes the semantics of the grouping. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + self.block() + .analyze_semantics(function_names, macro_names, handler) + } +} + +impl Run { + /// Analyzes the semantics of the run statement. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + self.expression() + .analyze_semantics(function_names, macro_names, handler) + } +} + +impl Semicolon { + /// Analyzes the semantics of the semicolon statement. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + match self.expression() { + Expression::Primary(Primary::FunctionCall(func)) => { + func.analyze_semantics(function_names, macro_names, handler) + } + Expression::Primary(unexpected) => { + let error = error::Error::UnexpectedExpression(UnexpectedExpression( + Expression::Primary(unexpected.clone()), + )); + handler.receive(error.clone()); + Err(error) + } + } + } +} + +impl ExecuteBlockHead { + /// Analyzes the semantics of the execute block head. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + match self { + Self::Align(align) => align.analyze_semantics(macro_names, handler), + Self::Anchored(anchored) => anchored.analyze_semantics(macro_names, handler), + Self::As(r#as) => r#as.analyze_semantics(macro_names, handler), + Self::At(at) => at.analyze_semantics(macro_names, handler), + Self::AsAt(asat) => asat.analyze_semantics(macro_names, handler), + Self::Conditional(cond) => cond.analyze_semantics(function_names, macro_names, handler), + Self::Facing(facing) => facing.analyze_semantics(macro_names, handler), + Self::In(r#in) => r#in.analyze_semantics(macro_names, handler), + Self::On(on) => on.analyze_semantics(macro_names, handler), + Self::Positioned(pos) => pos.analyze_semantics(macro_names, handler), + Self::Rotated(rot) => rot.analyze_semantics(macro_names, handler), + Self::Store(store) => store.analyze_semantics(macro_names, handler), + Self::Summon(summon) => summon.analyze_semantics(macro_names, handler), + } + } +} + +impl ExecuteBlockTail { + /// Analyzes the semantics of the execute block tail. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + match self { + Self::Block(block) => block.analyze_semantics(function_names, macro_names, handler), + Self::ExecuteBlock(_, ex) => ex.analyze_semantics(function_names, macro_names, handler), + } + } +} + +impl Conditional { + /// Analyzes the semantics of the conditional. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + self.condition() + .analyze_semantics(function_names, macro_names, handler) + } +} + +impl ParenthesizedCondition { + /// Analyzes the semantics of the parenthesized condition. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + self.condition + .analyze_semantics(function_names, macro_names, handler) + } +} + +impl Condition { + /// Analyzes the semantics of the condition. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + match self { + Self::Primary(prim) => prim.analyze_semantics(function_names, macro_names, handler), + Self::Binary(bin) => bin.analyze_semantics(function_names, macro_names, handler), + } + } +} + +impl Else { + /// Analyzes the semantics of the else block. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + self.block() + .analyze_semantics(function_names, macro_names, handler) + } +} + +impl MacroStringLiteral { + /// Analyzes the semantics of the macro string literal. + pub fn analyze_semantics( + &self, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + let mut errors = Vec::new(); + for part in self.parts() { + if let MacroStringLiteralPart::MacroUsage { identifier, .. } = part { + if !macro_names.contains(identifier.span.str()) { + let err = error::Error::UnresolvedMacroUsage(UnresolvedMacroUsage { + span: identifier.span(), + }); + handler.receive(err.clone()); + errors.push(err); + } + } + } + + #[expect(clippy::option_if_let_else)] + if let Some(err) = errors.first() { + Err(err.clone()) + } else { + Ok(()) + } + } +} + +impl Expression { + /// Analyzes the semantics of an expression. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + match self { + Self::Primary(prim) => prim.analyze_semantics(function_names, macro_names, handler), + } + } +} + +impl Primary { + /// Analyzes the semantics of a primary expression. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + match self { + Self::FunctionCall(func) => { + func.analyze_semantics(function_names, macro_names, handler) + } + Self::Lua(_) | Self::StringLiteral(_) => Ok(()), + Self::MacroStringLiteral(literal) => literal.analyze_semantics(macro_names, handler), + } + } +} + +impl FunctionCall { + /// Analyzes the semantics of a function call. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + let mut errors = Vec::new(); + + if !function_names.contains(self.identifier().span.str()) { + let err = error::Error::MissingFunctionDeclaration( + MissingFunctionDeclaration::from_context(self.identifier().span(), function_names), + ); + handler.receive(err.clone()); + errors.push(err); + } + + for expression in self + .arguments() + .iter() + .flat_map(super::syntax::syntax_tree::ConnectedList::elements) + { + if let Err(err) = expression.analyze_semantics(function_names, macro_names, handler) { + handler.receive(err.clone()); + errors.push(err); + } + } + + #[expect(clippy::option_if_let_else)] + if let Some(err) = errors.first() { + Err(err.clone()) + } else { + Ok(()) + } + } +} + +impl AnyStringLiteral { + /// Analyzes the semantics of any string literal. + pub fn analyze_semantics( + &self, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + match self { + Self::StringLiteral(_) => Ok(()), + Self::MacroStringLiteral(literal) => literal.analyze_semantics(macro_names, handler), + } + } +} + +impl PrimaryCondition { + /// Analyzes the semantics of a primary condition. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + match self { + Self::Parenthesized(paren) => { + paren.analyze_semantics(function_names, macro_names, handler) + } + Self::StringLiteral(_) => Ok(()), + Self::Unary(unary) => unary.analyze_semantics(function_names, macro_names, handler), + } + } +} + +impl UnaryCondition { + /// Analyzes the semantics of an unary condition. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + self.operand() + .analyze_semantics(function_names, macro_names, handler) + } +} + +impl BinaryCondition { + /// Analyzes the semantics of a binary condition. + pub fn analyze_semantics( + &self, + function_names: &HashSet, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), error::Error> { + let a = self + .left_operand() + .analyze_semantics(function_names, macro_names, handler) + .inspect_err(|err| { + handler.receive(err.clone()); + }); + let b = self + .right_operand() + .analyze_semantics(function_names, macro_names, handler) + .inspect_err(|err| { + handler.receive(err.clone()); + }); + if a.is_err() { + a + } else { + b + } + } +} diff --git a/src/syntax/error.rs b/src/syntax/error.rs index 4f543ec..a69883a 100644 --- a/src/syntax/error.rs +++ b/src/syntax/error.rs @@ -5,7 +5,7 @@ use std::fmt::Display; use crate::{ base::{ log::{Message, Severity, SourceCodeDisplay}, - source_file::Span, + source_file::{SourceElement as _, Span}, }, lexical::token::{KeywordKind, Token}, }; @@ -34,6 +34,8 @@ pub enum SyntaxKind { Declaration, Numeric, StringLiteral, + MacroStringLiteral, + AnyStringLiteral, Statement, Expression, Type, @@ -69,6 +71,8 @@ impl SyntaxKind { Self::Declaration => "a declaration token".to_string(), Self::Numeric => "a numeric token".to_string(), Self::StringLiteral => "a string literal".to_string(), + Self::MacroStringLiteral => "a macro string literal".to_string(), + Self::AnyStringLiteral => "a (macro) string literal".to_string(), Self::Statement => "a statement syntax".to_string(), Self::Expression => "an expression syntax".to_string(), Self::Type => "a type syntax".to_string(), @@ -105,6 +109,7 @@ impl Display for UnexpectedSyntax { Some(Token::Numeric(..)) => "a numeric token".to_string(), Some(Token::CommandLiteral(..)) => "a literal command token".to_string(), Some(Token::StringLiteral(..)) => "a string literal token".to_string(), + Some(Token::MacroStringLiteral(..)) => "a macro string literal token".to_string(), None => "EOF".to_string(), }; @@ -117,7 +122,7 @@ impl Display for UnexpectedSyntax { write!( f, "\n{}", - SourceCodeDisplay::new(span.span(), Option::::None) + SourceCodeDisplay::new(&span.span(), Option::::None) ) }) } diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index b2e14b4..bf2460d 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -6,12 +6,18 @@ use enum_as_inner::EnumAsInner; use crate::{ base::{self, Handler}, lexical::{ - token::{Identifier, Keyword, KeywordKind, Numeric, Punctuation, StringLiteral, Token}, + token::{ + Identifier, Keyword, KeywordKind, MacroStringLiteral, Numeric, Punctuation, + StringLiteral, Token, + }, token_stream::{Delimited, Delimiter, TokenStream, TokenTree}, }, }; -use super::error::{Error, ParseResult, SyntaxKind, UnexpectedSyntax}; +use super::{ + error::{Error, ParseResult, SyntaxKind, UnexpectedSyntax}, + syntax_tree::AnyStringLiteral, +}; /// Represents a parser that reads a token stream and constructs an abstract syntax tree. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deref, DerefMut)] @@ -432,6 +438,49 @@ impl<'a> Frame<'a> { } } + /// Expects the next [`Token`] to be an [`MacroStringLiteral`], and returns it. + /// + /// # Errors + /// If the next [`Token`] is not an [`MacroStringLiteral`]. + pub fn parse_macro_string_literal( + &mut self, + handler: &impl Handler, + ) -> ParseResult { + match self.next_significant_token() { + Reading::Atomic(Token::MacroStringLiteral(literal)) => Ok(literal), + found => { + let err = Error::UnexpectedSyntax(UnexpectedSyntax { + expected: SyntaxKind::MacroStringLiteral, + found: found.into_token(), + }); + handler.receive(err.clone()); + Err(err) + } + } + } + + /// Expects the next [`Token`] to be an [`AnyStringLiteral`], and returns it. + /// + /// # Errors + /// If the next [`Token`] is not an [`AnyStringLiteral`]. + pub fn parse_any_string_literal( + &mut self, + handler: &impl Handler, + ) -> ParseResult { + match self.next_significant_token() { + Reading::Atomic(Token::StringLiteral(literal)) => Ok(literal.into()), + Reading::Atomic(Token::MacroStringLiteral(literal)) => Ok(literal.into()), + found => { + let err = Error::UnexpectedSyntax(UnexpectedSyntax { + expected: SyntaxKind::AnyStringLiteral, + found: found.into_token(), + }); + handler.receive(err.clone()); + Err(err) + } + } + } + /// Expects the next [`Token`] to be a [`Keyword`] of specific kind, and returns it. /// /// # Errors diff --git a/src/syntax/syntax_tree/condition.rs b/src/syntax/syntax_tree/condition.rs index 84632f8..ebdba7a 100644 --- a/src/syntax/syntax_tree/condition.rs +++ b/src/syntax/syntax_tree/condition.rs @@ -14,7 +14,7 @@ use crate::{ Handler, VoidHandler, }, lexical::{ - token::{Punctuation, StringLiteral, Token}, + token::{Punctuation, Token}, token_stream::Delimiter, }, syntax::{ @@ -23,6 +23,8 @@ use crate::{ }, }; +use super::AnyStringLiteral; + /// Condition that is viewed as a single entity during precedence parsing. /// /// Syntax Synopsis: @@ -31,7 +33,7 @@ use crate::{ /// PrimaryCondition: /// UnaryCondition /// | ParenthesizedCondition -/// | StringLiteral +/// | AnyStringLiteral /// ``` #[allow(missing_docs)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -39,7 +41,7 @@ use crate::{ pub enum PrimaryCondition { Unary(UnaryCondition), Parenthesized(ParenthesizedCondition), - StringLiteral(StringLiteral), + StringLiteral(AnyStringLiteral), } impl SourceElement for PrimaryCondition { @@ -354,7 +356,13 @@ impl<'a> Parser<'a> { // string literal Reading::Atomic(Token::StringLiteral(literal)) => { self.forward(); - Ok(PrimaryCondition::StringLiteral(literal)) + Ok(PrimaryCondition::StringLiteral(literal.into())) + } + + // macro string literal + Reading::Atomic(Token::MacroStringLiteral(literal)) => { + self.forward(); + Ok(PrimaryCondition::StringLiteral(literal.into())) } // parenthesized condition diff --git a/src/syntax/syntax_tree/expression.rs b/src/syntax/syntax_tree/expression.rs index 707dff7..cd1b52c 100644 --- a/src/syntax/syntax_tree/expression.rs +++ b/src/syntax/syntax_tree/expression.rs @@ -10,7 +10,9 @@ use crate::{ Handler, }, lexical::{ - token::{Identifier, Keyword, KeywordKind, Punctuation, StringLiteral, Token}, + token::{ + Identifier, Keyword, KeywordKind, MacroStringLiteral, Punctuation, StringLiteral, Token, + }, token_stream::Delimiter, }, syntax::{ @@ -52,6 +54,9 @@ impl SourceElement for Expression { /// ``` ebnf /// Primary: /// FunctionCall +/// | StringLiteral +/// | MacroStringLiteral +/// | LuaCode /// ``` #[allow(missing_docs)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -59,6 +64,7 @@ impl SourceElement for Expression { pub enum Primary { FunctionCall(FunctionCall), StringLiteral(StringLiteral), + MacroStringLiteral(MacroStringLiteral), Lua(Box), } @@ -67,6 +73,7 @@ impl SourceElement for Primary { match self { Self::FunctionCall(function_call) => function_call.span(), Self::StringLiteral(string_literal) => string_literal.span(), + Self::MacroStringLiteral(macro_string_literal) => macro_string_literal.span(), Self::Lua(lua_code) => lua_code.span(), } } @@ -180,6 +187,7 @@ impl<'a> Parser<'a> { /// # Errors /// - If the parser is not at a primary expression. /// - If the parser is not at a valid primary expression. + #[expect(clippy::too_many_lines)] pub fn parse_primary(&mut self, handler: &impl Handler) -> ParseResult { match self.stop_at_significant() { // identifier expression @@ -224,6 +232,14 @@ impl<'a> Parser<'a> { Ok(Primary::StringLiteral(literal)) } + // macro string literal expression + Reading::Atomic(Token::MacroStringLiteral(macro_string_literal)) => { + // eat the macro string literal + self.forward(); + + Ok(Primary::MacroStringLiteral(macro_string_literal)) + } + // lua code expression Reading::Atomic(Token::Keyword(lua_keyword)) if lua_keyword.keyword == KeywordKind::Lua => @@ -267,10 +283,11 @@ impl<'a> Parser<'a> { let combined = first .into_token() .and_then(|first| { - first.span().join(&last.into_token().map_or_else( - || first.span().to_owned(), - |last| last.span().to_owned(), - )) + first.span().join( + &last + .into_token() + .map_or_else(|| first.span(), |last| last.span()), + ) }) .expect("Invalid lua code span"); diff --git a/src/syntax/syntax_tree/mod.rs b/src/syntax/syntax_tree/mod.rs index df20696..bba3f3b 100644 --- a/src/syntax/syntax_tree/mod.rs +++ b/src/syntax/syntax_tree/mod.rs @@ -1,5 +1,6 @@ //! Contains the syntax tree nodes that represent the structure of the source code. +use derive_more::derive::From; use getset::Getters; use crate::{ @@ -9,7 +10,7 @@ use crate::{ Handler, VoidHandler, }, lexical::{ - token::{Punctuation, Token}, + token::{MacroStringLiteral, Punctuation, StringLiteral, Token}, token_stream::Delimiter, }, syntax::parser::Reading, @@ -64,6 +65,29 @@ pub struct DelimitedList { pub close: Punctuation, } +/// Represents a syntax tree node that can be either a string literal or a macro string literal. +/// +/// Syntax Synopsis: +/// ```ebnf +/// AnyStringLiteral: StringLiteral | MacroStringLiteral ; +/// ``` +#[allow(missing_docs)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, From)] +pub enum AnyStringLiteral { + StringLiteral(StringLiteral), + MacroStringLiteral(MacroStringLiteral), +} + +impl SourceElement for AnyStringLiteral { + fn span(&self) -> Span { + match self { + Self::StringLiteral(string_literal) => string_literal.span(), + Self::MacroStringLiteral(macro_string_literal) => macro_string_literal.span(), + } + } +} + impl<'a> Parser<'a> { /// Parses a list of elements enclosed by a pair of delimiters, separated by a separator. /// diff --git a/src/syntax/syntax_tree/program.rs b/src/syntax/syntax_tree/program.rs index e487d2a..febff77 100644 --- a/src/syntax/syntax_tree/program.rs +++ b/src/syntax/syntax_tree/program.rs @@ -12,7 +12,7 @@ use crate::{ lexical::token::{Keyword, KeywordKind, Punctuation, StringLiteral, Token}, syntax::{ self, - error::{InvalidArgument, ParseResult, SyntaxKind, UnexpectedSyntax}, + error::{ParseResult, SyntaxKind, UnexpectedSyntax}, parser::{Parser, Reading}, }, }; @@ -108,22 +108,7 @@ impl<'a> Parser<'a> { // eat the keyword self.forward(); - let namespace_name = self.parse_string_literal(handler).and_then(|name| { - Namespace::validate_str(name.str_content().as_ref()) - .map(|()| name.clone()) - .map_err(|invalid| { - let err = syntax::error::Error::InvalidArgument(InvalidArgument { - message: format!( - "Invalid characters in namespace '{}'. The following characters are not allowed in namespace definitions: '{}'", - name.str_content(), - invalid - ), - span: name.span(), - }); - handler.receive(err.clone()); - err - }) - })?; + let namespace_name = self.parse_string_literal(handler)?; let semicolon = self.parse_punctuation(';', true, handler)?; diff --git a/src/syntax/syntax_tree/statement/execute_block.rs b/src/syntax/syntax_tree/statement/execute_block.rs index 302ea5d..ef19b7f 100644 --- a/src/syntax/syntax_tree/statement/execute_block.rs +++ b/src/syntax/syntax_tree/statement/execute_block.rs @@ -1,5 +1,7 @@ //! Execute block statement syntax tree. +use std::collections::HashSet; + use derive_more::From; use enum_as_inner::EnumAsInner; use getset::Getters; @@ -11,13 +13,13 @@ use crate::{ Handler, VoidHandler, }, lexical::{ - token::{Keyword, KeywordKind, Punctuation, StringLiteral, Token}, + token::{Keyword, KeywordKind, Punctuation, Token}, token_stream::Delimiter, }, syntax::{ error::{Error, ParseResult, SyntaxKind, UnexpectedSyntax}, parser::{DelimitedTree, Parser, Reading}, - syntax_tree::condition::ParenthesizedCondition, + syntax_tree::{condition::ParenthesizedCondition, AnyStringLiteral}, }, }; @@ -217,7 +219,7 @@ impl SourceElement for Else { /// /// ```ebnf /// As: -/// 'as' '(' StringLiteral ')' +/// 'as' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -231,7 +233,7 @@ pub struct As { open_paren: Punctuation, /// The selector of the as statement. #[get = "pub"] - as_selector: StringLiteral, + as_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -248,7 +250,7 @@ impl SourceElement for As { impl As { /// Dissolves the [`As`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.as_keyword, self.open_paren, @@ -263,7 +265,7 @@ impl As { /// Syntax Synopsis: /// ```ebnf /// Align: -/// 'align' '(' StringLiteral ')' +/// 'align' '(' AnyStringLiteral ')' /// ; #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)] @@ -276,7 +278,7 @@ pub struct Align { open_paren: Punctuation, /// The selector of the align statement. #[get = "pub"] - align_selector: StringLiteral, + align_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -294,7 +296,7 @@ impl SourceElement for Align { impl Align { /// Dissolves the [`Align`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.align_keyword, self.open_paren, @@ -309,7 +311,7 @@ impl Align { /// Syntax Synopsis: /// ```ebnf /// Anchored: -/// 'anchored' '(' StringLiteral ')' +/// 'anchored' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -323,7 +325,7 @@ pub struct Anchored { open_paren: Punctuation, /// The selector of the anchored statement. #[get = "pub"] - anchored_selector: StringLiteral, + anchored_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -339,7 +341,7 @@ impl SourceElement for Anchored { impl Anchored { /// Dissolves the [`Anchored`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.anchored_keyword, self.open_paren, @@ -354,7 +356,7 @@ impl Anchored { /// Syntax Synopsis: /// ```ebnf /// AsAt: -/// 'asat' '(' StringLiteral ')' +/// 'asat' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -368,7 +370,7 @@ pub struct AsAt { open_paren: Punctuation, /// The selector of the asat statement. #[get = "pub"] - asat_selector: StringLiteral, + asat_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -384,7 +386,7 @@ impl SourceElement for AsAt { impl AsAt { /// Dissolves the [`AsAt`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.asat_keyword, self.open_paren, @@ -399,7 +401,7 @@ impl AsAt { /// Syntax Synopsis: /// ```ebnf /// At: -/// 'at' '(' StringLiteral ')' +/// 'at' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -413,7 +415,7 @@ pub struct At { open_paren: Punctuation, /// The selector of the at statement. #[get = "pub"] - at_selector: StringLiteral, + at_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -429,7 +431,7 @@ impl SourceElement for At { impl At { /// Dissolves the [`At`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.at_keyword, self.open_paren, @@ -444,7 +446,7 @@ impl At { /// Syntax Synopsis: /// ```ebnf /// Facing: -/// 'facing' '(' StringLiteral ')' +/// 'facing' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -458,7 +460,7 @@ pub struct Facing { open_paren: Punctuation, /// The selector of the facing statement. #[get = "pub"] - facing_selector: StringLiteral, + facing_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -474,7 +476,7 @@ impl SourceElement for Facing { impl Facing { /// Dissolves the [`Facing`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.facing_keyword, self.open_paren, @@ -489,7 +491,7 @@ impl Facing { /// Syntax Synopsis: /// ```ebnf /// In: -/// 'in' '(' StringLiteral ')' +/// 'in' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -503,7 +505,7 @@ pub struct In { open_paren: Punctuation, /// The selector of the in statement. #[get = "pub"] - in_selector: StringLiteral, + in_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -519,7 +521,7 @@ impl SourceElement for In { impl In { /// Dissolves the [`In`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.in_keyword, self.open_paren, @@ -534,7 +536,7 @@ impl In { /// Syntax Synopsis: /// ```ebnf /// On: -/// 'on' '(' StringLiteral ')' +/// 'on' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -548,7 +550,7 @@ pub struct On { open_paren: Punctuation, /// The selector of the on statement. #[get = "pub"] - on_selector: StringLiteral, + on_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -564,7 +566,7 @@ impl SourceElement for On { impl On { /// Dissolves the [`On`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.on_keyword, self.open_paren, @@ -579,7 +581,7 @@ impl On { /// Syntax Synopsis: /// ```ebnf /// Positioned: -/// 'positioned' '(' StringLiteral ')' +/// 'positioned' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -593,7 +595,7 @@ pub struct Positioned { open_paren: Punctuation, /// The selector of the positioned statement. #[get = "pub"] - positioned_selector: StringLiteral, + positioned_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -609,7 +611,7 @@ impl SourceElement for Positioned { impl Positioned { /// Dissolves the [`Positioned`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.positioned_keyword, self.open_paren, @@ -624,7 +626,7 @@ impl Positioned { /// Syntax Synopsis: /// ```ebnf /// Rotated: -/// 'rotated' '(' StringLiteral ')' +/// 'rotated' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -638,7 +640,7 @@ pub struct Rotated { open_paren: Punctuation, /// The selector of the rotated statement. #[get = "pub"] - rotated_selector: StringLiteral, + rotated_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -654,7 +656,7 @@ impl SourceElement for Rotated { impl Rotated { /// Dissolves the [`Rotated`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.rotated_keyword, self.open_paren, @@ -669,7 +671,7 @@ impl Rotated { /// Syntax Synopsis: /// ```ebnf /// Store: -/// 'store' '(' StringLiteral ')' +/// 'store' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -683,7 +685,7 @@ pub struct Store { open_paren: Punctuation, /// The selector of the store statement. #[get = "pub"] - store_selector: StringLiteral, + store_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -699,7 +701,7 @@ impl SourceElement for Store { impl Store { /// Dissolves the [`Store`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.store_keyword, self.open_paren, @@ -714,7 +716,7 @@ impl Store { /// Syntax Synopsis: /// ```ebnf /// Summon: -/// 'summon' '(' StringLiteral ')' +/// 'summon' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -728,7 +730,7 @@ pub struct Summon { open_paren: Punctuation, /// The selector of the summon statement. #[get = "pub"] - summon_selector: StringLiteral, + summon_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -744,7 +746,7 @@ impl SourceElement for Summon { impl Summon { /// Dissolves the [`Summon`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.summon_keyword, self.open_paren, @@ -825,7 +827,7 @@ impl<'a> Parser<'a> { let argument = match self.stop_at_significant() { Reading::IntoDelimited(punc) if punc.punctuation == '(' => self.step_into( Delimiter::Parenthesis, - |parser| parser.parse_string_literal(handler), + |parser| parser.parse_any_string_literal(handler), handler, ), unexpected => { @@ -896,7 +898,7 @@ impl<'a> Parser<'a> { fn head_from_keyword( keyword: Keyword, - argument: DelimitedTree, + argument: DelimitedTree, ) -> ParseResult { Ok(match keyword.keyword { KeywordKind::Align => Align { @@ -986,3 +988,91 @@ fn head_from_keyword( _ => unreachable!("The keyword is not a valid execute block head."), }) } + +/// Trait for the execute block head items with a [`AnyStringLiteral`] as their selector. +pub trait ExecuteBlockHeadItem { + /// Returns a reference to the selector of the execute block head item. + fn selector(&self) -> &AnyStringLiteral; + + /// Analyzes the semantics of the execute block head item. + #[expect(clippy::missing_errors_doc)] + fn analyze_semantics( + &self, + macro_names: &HashSet, + handler: &impl Handler, + ) -> Result<(), crate::semantic::error::Error> { + self.selector().analyze_semantics(macro_names, handler) + } +} + +impl ExecuteBlockHeadItem for Align { + fn selector(&self) -> &AnyStringLiteral { + &self.align_selector + } +} + +impl ExecuteBlockHeadItem for Anchored { + fn selector(&self) -> &AnyStringLiteral { + &self.anchored_selector + } +} + +impl ExecuteBlockHeadItem for As { + fn selector(&self) -> &AnyStringLiteral { + &self.as_selector + } +} + +impl ExecuteBlockHeadItem for At { + fn selector(&self) -> &AnyStringLiteral { + &self.at_selector + } +} + +impl ExecuteBlockHeadItem for AsAt { + fn selector(&self) -> &AnyStringLiteral { + &self.asat_selector + } +} + +impl ExecuteBlockHeadItem for Facing { + fn selector(&self) -> &AnyStringLiteral { + &self.facing_selector + } +} + +impl ExecuteBlockHeadItem for In { + fn selector(&self) -> &AnyStringLiteral { + &self.in_selector + } +} + +impl ExecuteBlockHeadItem for On { + fn selector(&self) -> &AnyStringLiteral { + &self.on_selector + } +} + +impl ExecuteBlockHeadItem for Positioned { + fn selector(&self) -> &AnyStringLiteral { + &self.positioned_selector + } +} + +impl ExecuteBlockHeadItem for Rotated { + fn selector(&self) -> &AnyStringLiteral { + &self.rotated_selector + } +} + +impl ExecuteBlockHeadItem for Store { + fn selector(&self) -> &AnyStringLiteral { + &self.store_selector + } +} + +impl ExecuteBlockHeadItem for Summon { + fn selector(&self) -> &AnyStringLiteral { + &self.summon_selector + } +} diff --git a/src/transpile/conversions.rs b/src/transpile/conversions.rs index 6478113..951e12a 100644 --- a/src/transpile/conversions.rs +++ b/src/transpile/conversions.rs @@ -1,10 +1,19 @@ //! Conversion functions for converting between tokens/ast-nodes and [`shulkerbox`] types -use shulkerbox::datapack::Condition as DpCondition; +use shulkerbox::{ + datapack::Condition as DpCondition, + util::{MacroString, MacroStringPart}, +}; -use crate::syntax::syntax_tree::condition::{ - BinaryCondition, Condition, ConditionalBinaryOperator, ConditionalPrefixOperator, - PrimaryCondition, +use crate::{ + lexical::token::{MacroStringLiteral, MacroStringLiteralPart}, + syntax::syntax_tree::{ + condition::{ + BinaryCondition, Condition, ConditionalBinaryOperator, ConditionalPrefixOperator, + PrimaryCondition, + }, + AnyStringLiteral, + }, }; impl From for DpCondition { @@ -19,9 +28,7 @@ impl From for DpCondition { impl From for DpCondition { fn from(value: PrimaryCondition) -> Self { match value { - PrimaryCondition::StringLiteral(literal) => { - Self::Atom(literal.str_content().to_string()) - } + PrimaryCondition::StringLiteral(literal) => Self::Atom(literal.into()), PrimaryCondition::Parenthesized(cond) => cond.dissolve().1.into(), PrimaryCondition::Unary(prefix) => match prefix.operator() { ConditionalPrefixOperator::LogicalNot(_) => { @@ -32,6 +39,56 @@ impl From for DpCondition { } } +impl From<&AnyStringLiteral> for MacroString { + fn from(value: &AnyStringLiteral) -> Self { + match value { + AnyStringLiteral::StringLiteral(literal) => Self::from(literal.str_content().as_ref()), + AnyStringLiteral::MacroStringLiteral(literal) => Self::from(literal), + } + } +} + +impl From for MacroString { + fn from(value: AnyStringLiteral) -> Self { + Self::from(&value) + } +} + +impl From<&MacroStringLiteral> for MacroString { + fn from(value: &MacroStringLiteral) -> Self { + if value + .parts() + .iter() + .any(|p| matches!(p, MacroStringLiteralPart::MacroUsage { .. })) + { + Self::MacroString( + value + .parts() + .iter() + .map(|part| match part { + MacroStringLiteralPart::Text(span) => MacroStringPart::String( + crate::util::unescape_macro_string(span.str()).to_string(), + ), + MacroStringLiteralPart::MacroUsage { identifier, .. } => { + MacroStringPart::MacroUsage( + super::util::identifier_to_macro(identifier.span.str()).to_string(), + ) + } + }) + .collect(), + ) + } else { + Self::String(value.str_content()) + } + } +} + +impl From for MacroString { + fn from(value: MacroStringLiteral) -> Self { + Self::from(&value) + } +} + impl From for DpCondition { fn from(value: BinaryCondition) -> Self { let (lhs, op, rhs) = value.dissolve(); diff --git a/src/transpile/error.rs b/src/transpile/error.rs index abdf88d..2dd1b9c 100644 --- a/src/transpile/error.rs +++ b/src/transpile/error.rs @@ -8,9 +8,9 @@ use itertools::Itertools; use crate::{ base::{ log::{Message, Severity, SourceCodeDisplay}, - source_file::{SourceElement, Span}, + source_file::Span, }, - syntax::syntax_tree::expression::Expression, + semantic::error::{ConflictingFunctionNames, InvalidFunctionArguments, UnexpectedExpression}, }; use super::FunctionData; @@ -29,6 +29,8 @@ pub enum TranspileError { LuaRuntimeError(#[from] LuaRuntimeError), #[error(transparent)] ConflictingFunctionNames(#[from] ConflictingFunctionNames), + #[error(transparent)] + InvalidFunctionArguments(#[from] InvalidFunctionArguments), } /// The result of a transpilation operation. @@ -144,52 +146,3 @@ impl LuaRuntimeError { } } } - -/// An error that occurs when a function declaration is missing. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct UnexpectedExpression(pub Expression); - -impl Display for UnexpectedExpression { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - Message::new(Severity::Error, "encountered unexpected expression") - )?; - - write!( - f, - "\n{}", - SourceCodeDisplay::new(&self.0.span(), Option::::None) - ) - } -} - -impl std::error::Error for UnexpectedExpression {} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ConflictingFunctionNames { - pub definition: Span, - pub name: String, -} - -impl Display for ConflictingFunctionNames { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - Message::new( - Severity::Error, - format!("the following function declaration conflicts with an existing function with name `{}`", self.name) - ) - )?; - - write!( - f, - "\n{}", - SourceCodeDisplay::new(&self.definition, Option::::None) - ) - } -} - -impl std::error::Error for ConflictingFunctionNames {} diff --git a/src/transpile/mod.rs b/src/transpile/mod.rs index 6ca242f..cdc3edd 100644 --- a/src/transpile/mod.rs +++ b/src/transpile/mod.rs @@ -20,13 +20,13 @@ mod transpiler; #[cfg_attr(feature = "shulkerbox", doc(inline))] pub use transpiler::Transpiler; -#[cfg(feature = "shulkerbox")] -mod util; +pub mod util; #[derive(Debug, Clone, PartialEq, Eq)] pub(super) struct FunctionData { pub(super) namespace: String, pub(super) identifier_span: Span, + pub(super) parameters: Vec, pub(super) statements: Vec, pub(super) public: bool, pub(super) annotations: HashMap>, diff --git a/src/transpile/transpiler.rs b/src/transpile/transpiler.rs index beb5453..9830c6b 100644 --- a/src/transpile/transpiler.rs +++ b/src/transpile/transpiler.rs @@ -3,7 +3,7 @@ use chksum_md5 as md5; use std::{ collections::{BTreeMap, HashMap}, - iter, + ops::Deref, }; use shulkerbox::datapack::{self, Command, Datapack, Execute}; @@ -14,6 +14,7 @@ use crate::{ source_file::{SourceElement, Span}, Handler, }, + semantic::error::{ConflictingFunctionNames, InvalidFunctionArguments, UnexpectedExpression}, syntax::syntax_tree::{ declaration::{Declaration, ImportItems}, expression::{Expression, FunctionCall, Primary}, @@ -23,11 +24,11 @@ use crate::{ Statement, }, }, - transpile::error::{ConflictingFunctionNames, MissingFunctionDeclaration}, + transpile::error::MissingFunctionDeclaration, }; use super::{ - error::{TranspileError, TranspileResult, UnexpectedExpression}, + error::{TranspileError, TranspileResult}, FunctionData, }; @@ -97,7 +98,7 @@ impl Transpiler { ); for identifier_span in always_transpile_functions { - self.get_or_transpile_function(&identifier_span, handler)?; + self.get_or_transpile_function(&identifier_span, None, handler)?; } Ok(()) @@ -122,7 +123,7 @@ impl Transpiler { &mut self, declaration: &Declaration, namespace: &Namespace, - _handler: &impl Handler, + handler: &impl Handler, ) { let program_identifier = declaration.span().source_file().identifier().clone(); match declaration { @@ -148,6 +149,15 @@ impl Transpiler { FunctionData { namespace: namespace.namespace_name().str_content().to_string(), identifier_span: identifier_span.clone(), + parameters: function + .parameters() + .as_ref() + .map(|l| { + l.elements() + .map(|i| i.span.str().to_string()) + .collect::>() + }) + .unwrap_or_default(), statements, public: function.is_public(), annotations, @@ -162,12 +172,13 @@ impl Transpiler { let aliases = &mut self.aliases; match import.items() { - ImportItems::All(_) => todo!("Importing all items is not yet supported."), + ImportItems::All(_) => { + handler.receive(base::Error::Other( + "Importing all items is not yet supported.".to_string(), + )); + } ImportItems::Named(list) => { - let items = iter::once(list.first()) - .chain(list.rest().iter().map(|(_, ident)| ident)); - - for item in items { + for item in list.elements() { let name = item.span.str(); aliases.insert( (program_identifier.clone(), name.to_string()), @@ -203,8 +214,9 @@ impl Transpiler { fn get_or_transpile_function( &mut self, identifier_span: &Span, + arguments: Option<&[&Expression]>, handler: &impl Handler, - ) -> TranspileResult { + ) -> TranspileResult<(String, Option>)> { let program_identifier = identifier_span.source_file().identifier(); let program_query = ( program_identifier.to_string(), @@ -244,6 +256,7 @@ impl Transpiler { handler.receive(error.clone()); error })?; + function_data.statements.clone() }; let commands = self.transpile_function(&statements, program_identifier, handler)?; @@ -315,10 +328,35 @@ impl Transpiler { ); } - let locations = &self.function_locations; - locations + let parameters = { + let function_data = self + .functions + .get(&program_query) + .or_else(|| { + alias_query + .clone() + .and_then(|q| self.functions.get(&q).filter(|f| f.public)) + }) + .ok_or_else(|| { + let error = TranspileError::MissingFunctionDeclaration( + MissingFunctionDeclaration::from_context( + identifier_span.clone(), + &self.functions, + ), + ); + handler.receive(error.clone()); + error + })?; + + function_data.parameters.clone() + }; + + let function_location = self + .function_locations .get(&program_query) - .or_else(|| alias_query.and_then(|q| locations.get(&q).filter(|(_, p)| *p))) + .or_else(|| { + alias_query.and_then(|q| self.function_locations.get(&q).filter(|(_, p)| *p)) + }) .ok_or_else(|| { let error = TranspileError::MissingFunctionDeclaration( MissingFunctionDeclaration::from_context( @@ -329,7 +367,57 @@ impl Transpiler { handler.receive(error.clone()); error }) - .map(|(s, _)| s.to_owned()) + .map(|(s, _)| s.to_owned())?; + + let arg_count = arguments.iter().flat_map(|x| x.iter()).count(); + if arg_count != parameters.len() { + let err = TranspileError::InvalidFunctionArguments(InvalidFunctionArguments { + expected: parameters.len(), + actual: arg_count, + span: identifier_span.clone(), + }); + handler.receive(err.clone()); + Err(err) + } else if arg_count > 0 { + let mut compiled_args = Vec::new(); + let mut errs = Vec::new(); + for expression in arguments.iter().flat_map(|x| x.iter()) { + let value = match expression { + Expression::Primary(Primary::FunctionCall(func)) => self + .transpile_function_call(func, handler) + .map(|cmd| match cmd { + Command::Raw(s) => s, + _ => unreachable!("Function call should always return a raw command"), + }), + Expression::Primary(Primary::Lua(lua)) => { + lua.eval_string(handler).map(Option::unwrap_or_default) + } + Expression::Primary(Primary::StringLiteral(string)) => { + Ok(string.str_content().to_string()) + } + Expression::Primary(Primary::MacroStringLiteral(literal)) => { + Ok(literal.str_content()) + } + }; + + match value { + Ok(value) => { + compiled_args.push(value); + } + Err(err) => { + compiled_args.push(String::new()); + errs.push(err.clone()); + } + } + } + if let Some(err) = errs.first() { + return Err(err.clone()); + } + let function_args = parameters.into_iter().zip(compiled_args).collect(); + Ok((function_location, Some(function_args))) + } else { + Ok((function_location, None)) + } } fn transpile_function( @@ -374,6 +462,9 @@ impl Transpiler { Expression::Primary(Primary::StringLiteral(string)) => { Ok(Some(Command::Raw(string.str_content().to_string()))) } + Expression::Primary(Primary::MacroStringLiteral(string)) => { + Ok(Some(Command::UsesMacro(string.into()))) + } Expression::Primary(Primary::Lua(code)) => { Ok(code.eval_string(handler)?.map(Command::Raw)) } @@ -431,8 +522,29 @@ impl Transpiler { func: &FunctionCall, handler: &impl Handler, ) -> TranspileResult { - let location = self.get_or_transpile_function(&func.identifier().span, handler)?; - Ok(Command::Raw(format!("function {location}"))) + let arguments = func + .arguments() + .as_ref() + .map(|l| l.elements().map(Deref::deref).collect::>()); + let (location, arguments) = + self.get_or_transpile_function(&func.identifier().span, arguments.as_deref(), handler)?; + let mut function_call = format!("function {location}"); + if let Some(arguments) = arguments { + use std::fmt::Write; + let arguments = arguments + .iter() + .map(|(ident, v)| { + format!( + r#"{macro_name}:"{escaped}""#, + macro_name = super::util::identifier_to_macro(ident), + escaped = crate::util::escape_str(v) + ) + }) + .collect::>() + .join(","); + write!(function_call, " {{{arguments}}}").unwrap(); + } + Ok(Command::Raw(function_call)) } fn transpile_execute_block( @@ -595,53 +707,53 @@ impl Transpiler { None } } - ExecuteBlockHead::As(as_) => { - let selector = as_.as_selector().str_content(); - tail.map(|tail| Execute::As(selector.to_string(), Box::new(tail))) + ExecuteBlockHead::As(r#as) => { + let selector = r#as.as_selector(); + tail.map(|tail| Execute::As(selector.into(), Box::new(tail))) } ExecuteBlockHead::At(at) => { - let selector = at.at_selector().str_content(); - tail.map(|tail| Execute::At(selector.to_string(), Box::new(tail))) + let selector = at.at_selector(); + tail.map(|tail| Execute::At(selector.into(), Box::new(tail))) } ExecuteBlockHead::Align(align) => { - let align = align.align_selector().str_content(); - tail.map(|tail| Execute::Align(align.to_string(), Box::new(tail))) + let align = align.align_selector(); + tail.map(|tail| Execute::Align(align.into(), Box::new(tail))) } ExecuteBlockHead::Anchored(anchored) => { - let anchor = anchored.anchored_selector().str_content(); - tail.map(|tail| Execute::Anchored(anchor.to_string(), Box::new(tail))) + let anchor = anchored.anchored_selector(); + tail.map(|tail| Execute::Anchored(anchor.into(), Box::new(tail))) } - ExecuteBlockHead::In(in_) => { - let dimension = in_.in_selector().str_content(); - tail.map(|tail| Execute::In(dimension.to_string(), Box::new(tail))) + ExecuteBlockHead::In(r#in) => { + let dimension = r#in.in_selector(); + tail.map(|tail| Execute::In(dimension.into(), Box::new(tail))) } ExecuteBlockHead::Positioned(positioned) => { - let position = positioned.positioned_selector().str_content(); - tail.map(|tail| Execute::Positioned(position.to_string(), Box::new(tail))) + let position = positioned.positioned_selector(); + tail.map(|tail| Execute::Positioned(position.into(), Box::new(tail))) } ExecuteBlockHead::Rotated(rotated) => { - let rotation = rotated.rotated_selector().str_content(); - tail.map(|tail| Execute::Rotated(rotation.to_string(), Box::new(tail))) + let rotation = rotated.rotated_selector(); + tail.map(|tail| Execute::Rotated(rotation.into(), Box::new(tail))) } ExecuteBlockHead::Facing(facing) => { - let facing = facing.facing_selector().str_content(); - tail.map(|tail| Execute::Facing(facing.to_string(), Box::new(tail))) + let facing = facing.facing_selector(); + tail.map(|tail| Execute::Facing(facing.into(), Box::new(tail))) } ExecuteBlockHead::AsAt(as_at) => { - let selector = as_at.asat_selector().str_content(); - tail.map(|tail| Execute::AsAt(selector.to_string(), Box::new(tail))) + let selector = as_at.asat_selector(); + tail.map(|tail| Execute::AsAt(selector.into(), Box::new(tail))) } ExecuteBlockHead::On(on) => { - let dimension = on.on_selector().str_content(); - tail.map(|tail| Execute::On(dimension.to_string(), Box::new(tail))) + let dimension = on.on_selector(); + tail.map(|tail| Execute::On(dimension.into(), Box::new(tail))) } ExecuteBlockHead::Store(store) => { - let store = store.store_selector().str_content(); - tail.map(|tail| Execute::Store(store.to_string(), Box::new(tail))) + let store = store.store_selector(); + tail.map(|tail| Execute::Store(store.into(), Box::new(tail))) } ExecuteBlockHead::Summon(summon) => { - let entity = summon.summon_selector().str_content(); - tail.map(|tail| Execute::Summon(entity.to_string(), Box::new(tail))) + let entity = summon.summon_selector(); + tail.map(|tail| Execute::Summon(entity.into(), Box::new(tail))) } }) } diff --git a/src/transpile/util.rs b/src/transpile/util.rs index d170c91..bcd5f22 100644 --- a/src/transpile/util.rs +++ b/src/transpile/util.rs @@ -1,3 +1,8 @@ +//! Utility methods for transpiling + +#[cfg(feature = "shulkerbox")] +use chksum_md5 as md5; + fn normalize_program_identifier(identifier: S) -> String where S: AsRef, @@ -19,6 +24,8 @@ where .join("/") } +/// Calculate the identifier to import the function based on the current identifier and the import path +#[must_use] pub fn calculate_import_identifier(current_identifier: S, import_path: T) -> String where S: AsRef, @@ -32,3 +39,25 @@ where normalize_program_identifier(identifier_elements.join("/") + "/" + import_path.as_ref()) } } + +/// Transforms an identifier to a macro name that only contains `a-zA-Z0-9_`. +#[cfg(feature = "shulkerbox")] +#[must_use] +pub fn identifier_to_macro(ident: &str) -> std::borrow::Cow { + if ident.contains("__") + || ident + .chars() + .any(|c| !(c == '_' && c.is_ascii_alphanumeric())) + { + let new_ident = ident + .chars() + .filter(|c| *c == '_' || c.is_ascii_alphanumeric()) + .collect::(); + + let chksum = md5::hash(ident).to_hex_lowercase(); + + std::borrow::Cow::Owned(new_ident + "__" + &chksum[..8]) + } else { + std::borrow::Cow::Borrowed(ident) + } +} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..2926b79 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,82 @@ +//! Utility functions for the `Shulkerscript` language. + +use std::borrow::Cow; + +/// Escapes `"` and `\` in a string. +#[must_use] +pub fn escape_str(s: &str) -> Cow { + if s.contains('"') || s.contains('\\') { + let mut escaped = String::with_capacity(s.len()); + for c in s.chars() { + match c { + '"' => escaped.push_str("\\\""), + '\\' => escaped.push_str("\\\\"), + _ => escaped.push(c), + } + } + Cow::Owned(escaped) + } else { + Cow::Borrowed(s) + } +} + +/// Unescapes '\`', `\`, `\n`, `\r` and `\t` in a string. +#[must_use] +pub fn unescape_macro_string(s: &str) -> Cow { + if s.contains('\\') || s.contains('`') { + Cow::Owned( + s.replace("\\n", "\n") + .replace("\\r", "\r") + .replace("\\t", "\t") + .replace("\\`", "`") + .replace("\\\\", "\\"), + ) + } else { + Cow::Borrowed(s) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_escape_str() { + assert_eq!(escape_str("Hello, world!"), "Hello, world!"); + assert_eq!(escape_str(r#"Hello, "world"!"#), r#"Hello, \"world\"!"#); + assert_eq!(escape_str(r"Hello, \world\!"), r"Hello, \\world\\!"); + } + + #[test] + fn test_unescape_macro_string() { + assert_eq!(unescape_macro_string("Hello, world!"), "Hello, world!"); + assert_eq!( + unescape_macro_string(r#"Hello, "world"!"#), + r#"Hello, "world"!"# + ); + assert_eq!( + unescape_macro_string(r"Hello, \world\!"), + r"Hello, \world\!" + ); + assert_eq!( + unescape_macro_string(r"Hello, \nworld\!"), + "Hello, \nworld\\!" + ); + assert_eq!( + unescape_macro_string(r"Hello, \rworld\!"), + "Hello, \rworld\\!" + ); + assert_eq!( + unescape_macro_string(r"Hello, \tworld\!"), + "Hello, \tworld\\!" + ); + assert_eq!( + unescape_macro_string(r"Hello, \`world\!"), + r"Hello, `world\!" + ); + assert_eq!( + unescape_macro_string(r"Hello, \\world\!"), + r"Hello, \world\!" + ); + } +} diff --git a/tests/transpiling/main.rs b/tests/transpiling/main.rs index e7fdece..9f2845a 100644 --- a/tests/transpiling/main.rs +++ b/tests/transpiling/main.rs @@ -28,9 +28,9 @@ fn transpile_test1() { main_fn.add_command(Command::Raw("say Hello, World!".to_string())); let exec_cmd = Command::Execute(Execute::As( - "@a".to_string(), + "@a".to_string().into(), Box::new(Execute::If( - Condition::Atom("entity @p[distance=..5]".to_string()), + Condition::Atom("entity @p[distance=..5]".to_string().into()), Box::new(Execute::Run(Box::new(Command::Raw( "say You are close to me!".to_string(), )))),