From b2cc3bb4c7c1d585be615adc5ac7fef49885badc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20H=C3=B6lting?= <87192362+moritz-hoelting@users.noreply.github.com> Date: Sun, 10 Nov 2024 16:04:10 +0100 Subject: [PATCH] implement first version of macros --- Cargo.toml | 5 +- grammar.md | 19 +- src/base/source_file.rs | 20 ++ src/lexical/token.rs | 314 ++++++++++++++++-- src/lexical/token_stream.rs | 24 +- src/syntax/error.rs | 9 +- src/syntax/parser.rs | 53 ++- src/syntax/syntax_tree/condition.rs | 16 +- src/syntax/syntax_tree/expression.rs | 27 +- src/syntax/syntax_tree/mod.rs | 26 +- .../syntax_tree/statement/execute_block.rs | 80 ++--- src/transpile/conversions.rs | 69 +++- src/transpile/lua.rs | 3 +- src/transpile/transpiler.rs | 55 +-- tests/transpiling/main.rs | 4 +- 15 files changed, 605 insertions(+), 119 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 541c098..9b997de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,11 +32,12 @@ derive_more = { version = "1.0.0", default-features = false, features = ["deref" enum-as-inner = "0.6.0" getset = "0.1.2" itertools = "0.13.0" -mlua = { version = "0.10.0", features = ["lua54", "vendored"], optional = true } +mlua = { version = "0.10.1", features = ["lua54", "vendored"], optional = true } path-absolutize = "3.1.1" pathdiff = "0.2.2" serde = { version = "1.0.214", features = ["derive", "rc"], optional = true } -shulkerbox = { version = "0.1.0", default-features = false, optional = true } +# shulkerbox = { version = "0.1.0", default-features = false, optional = true } +shulkerbox = { git = "https://github.com/moritz-hoelting/shulkerbox", rev = "8f05fef7030d3e999a07d621ba581ebbb205dadc", default-features = false, optional = true } strsim = "0.11.1" strum = { version = "0.26.2", features = ["derive"] } strum_macros = "0.26.4" diff --git a/grammar.md b/grammar.md index 0419177..a17cc85 100644 --- a/grammar.md +++ b/grammar.md @@ -12,6 +12,21 @@ Program: Namespace Declaration*; Namespace: 'namespace' StringLiteral; ``` +### StringLiteral +```ebnf +StringLiteral: '"' TEXT '"'; +``` + +### MacroStringLiteral +```ebnf +MacroStringLiteral: '`' ( TEXT | '$(' [a-zA-Z0-9_]+ ')' )* '`'; +``` + +### AnyStringLiteral +```ebnf +AnyStringLiteral: StringLiteral | MacroStringLiteral; +``` + ### Declaration ```ebnf Declaration: FunctionDeclaration | Import | TagDeclaration; @@ -87,7 +102,7 @@ Condition: PrimaryCondition: ConditionalPrefix | ParenthesizedCondition - | StringLiteral + | AnyStringLiteral ; ``` @@ -144,6 +159,8 @@ Expression: ```ebnf Primary: FunctionCall + | AnyStringLiteral + | LuaCode ; ``` diff --git a/src/base/source_file.rs b/src/base/source_file.rs index 369e210..f31a114 100644 --- a/src/base/source_file.rs +++ b/src/base/source_file.rs @@ -253,6 +253,26 @@ impl Span { }) } + /// Create a span from the given start byte index to the end of the source file with an offset. + #[must_use] + pub fn to_end_with_offset( + source_file: Arc, + start: usize, + end_offset: isize, + ) -> Option { + if !source_file.content().is_char_boundary(start) { + return None; + } + Some(Self { + start, + end: source_file + .content() + .len() + .saturating_add_signed(end_offset), + source_file, + }) + } + /// Get the string slice of the source code that the span represents. #[must_use] pub fn str(&self) -> &str { diff --git a/src/lexical/token.rs b/src/lexical/token.rs index 253bec0..67a187c 100644 --- a/src/lexical/token.rs +++ b/src/lexical/token.rs @@ -4,6 +4,7 @@ use std::{borrow::Cow, collections::HashMap, fmt::Display, str::FromStr, sync::O use crate::base::{ self, + log::SourceCodeDisplay, source_file::{SourceElement, SourceIterator, Span}, Handler, }; @@ -146,24 +147,7 @@ pub enum Token { DocComment(DocComment), CommandLiteral(CommandLiteral), StringLiteral(StringLiteral), -} - -impl Token { - /// Returns the span of the token. - #[must_use] - pub fn span(&self) -> &Span { - match self { - Self::WhiteSpaces(token) => &token.span, - Self::Identifier(token) => &token.span, - Self::Keyword(token) => &token.span, - Self::Punctuation(token) => &token.span, - Self::Numeric(token) => &token.span, - Self::Comment(token) => &token.span, - Self::DocComment(token) => &token.span, - Self::CommandLiteral(token) => &token.span, - Self::StringLiteral(token) => &token.span, - } - } + MacroStringLiteral(MacroStringLiteral), } impl SourceElement for Token { @@ -178,6 +162,7 @@ impl SourceElement for Token { Self::DocComment(token) => token.span(), Self::CommandLiteral(token) => token.span(), Self::StringLiteral(token) => token.span(), + Self::MacroStringLiteral(token) => token.span(), } } } @@ -291,6 +276,67 @@ impl SourceElement for StringLiteral { } } +/// Represents a hardcoded macro string literal value in the source code. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct MacroStringLiteral { + /// The backtick that starts the macro string literal. + starting_backtick: Punctuation, + /// The parts that make up the macro string literal. + parts: Vec, + /// The backtick that ends the macro string literal. + ending_backtick: Punctuation, +} + +impl MacroStringLiteral { + /// Returns the string content without escapement characters, leading and trailing double quotes. + #[must_use] + pub fn str_content(&self) -> String { + let span = self.span(); + let string = span.str(); + let string = &string[1..string.len() - 1]; + if string.contains('\\') { + string + .replace("\\n", "\n") + .replace("\\r", "\r") + .replace("\\t", "\t") + .replace("\\\"", "\"") + .replace("\\\\", "\\") + } else { + string.to_string() + } + } + + /// Returns the parts that make up the macro string literal. + #[must_use] + pub fn parts(&self) -> &[MacroStringLiteralPart] { + &self.parts + } +} + +impl SourceElement for MacroStringLiteral { + fn span(&self) -> Span { + self.starting_backtick + .span + .join(&self.ending_backtick.span) + .expect("Invalid macro string literal span") + } +} + +/// Represents a part of a macro string literal value in the source code. +#[allow(missing_docs)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum MacroStringLiteralPart { + Text(Span), + MacroUsage { + dollar: Punctuation, + open_brace: Punctuation, + identifier: Identifier, + close_brace: Punctuation, + }, +} + /// Is an enumeration representing the two kinds of comments in the Shulkerscript programming language. #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -363,7 +409,7 @@ impl CommandLiteral { } /// Is an error that can occur when invoking the [`Token::tokenize`] method. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, thiserror::Error, From)] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, thiserror::Error)] #[allow(missing_docs)] pub enum TokenizeError { #[error("encountered a fatal lexical error that causes the process to stop.")] @@ -371,8 +417,95 @@ pub enum TokenizeError { #[error("the iterator argument is at the end of the source code.")] EndOfSourceCodeIteratorArgument, + + #[error(transparent)] + InvalidMacroNameCharacter(#[from] InvalidMacroNameCharacter), + + #[error(transparent)] + UnclosedMacroUsage(#[from] UnclosedMacroUsage), + + #[error(transparent)] + EmptyMacroUsage(#[from] EmptyMacroUsage), } +/// Is an error that can occur when the macro name contains invalid characters. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct InvalidMacroNameCharacter { + /// The span of the invalid characters. + pub span: Span, +} + +impl Display for InvalidMacroNameCharacter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + base::log::Message::new(base::log::Severity::Error, format!("The macro name contains invalid characters: `{}`. Only alphanumeric characters and underscores are allowed.", self.span.str())) + )?; + write!( + f, + "\n{}", + SourceCodeDisplay::new(&self.span, Option::::None) + ) + } +} + +impl std::error::Error for InvalidMacroNameCharacter {} + +/// Is an error that can occur when the macro usage is not closed. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct UnclosedMacroUsage { + /// The span of the unclosed macro usage. + pub span: Span, +} + +impl Display for UnclosedMacroUsage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + base::log::Message::new( + base::log::Severity::Error, + "A macro usage was opened with `$(` but never closed." + ) + )?; + write!( + f, + "\n{}", + SourceCodeDisplay::new(&self.span, Option::::None) + ) + } +} + +impl std::error::Error for UnclosedMacroUsage {} + +/// Is an error that can occur when the macro usage is not closed. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct EmptyMacroUsage { + /// The span of the unclosed macro usage. + pub span: Span, +} + +impl Display for EmptyMacroUsage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + base::log::Message::new( + base::log::Severity::Error, + "A macro usage was opened with `$(` but closed immediately with `)`." + ) + )?; + write!( + f, + "\n{}", + SourceCodeDisplay::new(&self.span, Option::::None) + ) + } +} + +impl std::error::Error for EmptyMacroUsage {} + impl Token { /// Increments the iterator while the predicate returns true. pub fn walk_iter(iter: &mut SourceIterator, predicate: impl Fn(char) -> bool) { @@ -386,6 +519,7 @@ impl Token { } /// Creates a span from the given start location to the current location of the iterator. + #[must_use] fn create_span(start: usize, iter: &mut SourceIterator) -> Span { iter.peek().map_or_else( || Span::to_end(iter.source_file().clone(), start).unwrap(), @@ -393,6 +527,26 @@ impl Token { ) } + /// Creates a span from the given start location to the current location of the iterator with the given offset. + #[must_use] + fn create_span_with_end_offset( + start: usize, + iter: &mut SourceIterator, + end_offset: isize, + ) -> Span { + iter.peek().map_or_else( + || Span::to_end_with_offset(iter.source_file().clone(), start, end_offset).unwrap(), + |(index, _)| { + Span::new( + iter.source_file().clone(), + start, + index.saturating_add_signed(end_offset), + ) + .unwrap() + }, + ) + } + /// Checks if the given character is a valid first character of an identifier. fn is_first_identifier_character(character: char) -> bool { character == '_' @@ -552,6 +706,120 @@ impl Token { .into() } + /// Handles a sequence of characters that are enclosed in backticks and contain macro usages + fn handle_macro_string_literal( + iter: &mut SourceIterator, + mut start: usize, + ) -> Result { + let mut is_escaped = false; + let mut is_inside_macro = false; + let mut encountered_open_parenthesis = false; + let starting_backtick = Punctuation { + span: Self::create_span(start, iter), + punctuation: '`', + }; + start += 1; + let mut parts = Vec::new(); + + while iter.peek().is_some() { + let (index, character) = iter.next().unwrap(); + + #[expect(clippy::collapsible_else_if)] + if is_inside_macro { + if character == ')' { + // Check if the macro usage is empty + if start + 2 == index { + return Err(EmptyMacroUsage { + span: Span::new(iter.source_file().clone(), start, index + 1).unwrap(), + } + .into()); + } + parts.push(MacroStringLiteralPart::MacroUsage { + dollar: Punctuation { + span: Span::new(iter.source_file().clone(), start, start + 1).unwrap(), + punctuation: '$', + }, + open_brace: Punctuation { + span: Span::new(iter.source_file().clone(), start + 1, start + 2) + .unwrap(), + punctuation: '(', + }, + identifier: Identifier { + span: Self::create_span_with_end_offset(start + 2, iter, -1), + }, + close_brace: Punctuation { + span: Span::new(iter.source_file().clone(), index, index + 1).unwrap(), + punctuation: ')', + }, + }); + start = index + 1; + is_inside_macro = false; + } else if !encountered_open_parenthesis && character == '(' { + encountered_open_parenthesis = true; + } else if encountered_open_parenthesis + && !Self::is_valid_macro_name_character(character) + { + if character == '`' { + return Err(UnclosedMacroUsage { + span: Span::new(iter.source_file().clone(), start, start + 2).unwrap(), + } + .into()); + } + + Self::walk_iter(iter, |c| { + c != ')' && !Self::is_valid_macro_name_character(c) + }); + return Err(InvalidMacroNameCharacter { + span: Self::create_span(index, iter), + } + .into()); + } + } else { + if character == '$' && iter.peek().is_some_and(|(_, c)| c == '(') { + parts.push(MacroStringLiteralPart::Text( + Self::create_span_with_end_offset(start, iter, -1), + )); + start = index; + is_inside_macro = true; + encountered_open_parenthesis = false; + } else if character == '\\' { + is_escaped = !is_escaped; + } else if character == '`' && !is_escaped { + if start != index { + parts.push(MacroStringLiteralPart::Text( + Self::create_span_with_end_offset(start, iter, -1), + )); + } + start = index; + break; + } else { + is_escaped = false; + } + } + } + + if is_inside_macro { + Err(UnclosedMacroUsage { + span: Span::new(iter.source_file().clone(), start, start + 2).unwrap(), + } + .into()) + } else { + Ok(MacroStringLiteral { + starting_backtick, + parts, + ending_backtick: Punctuation { + span: Self::create_span(start, iter), + punctuation: '`', + }, + } + .into()) + } + } + + fn is_valid_macro_name_character(character: char) -> bool { + character.is_ascii_alphanumeric() || character == '_' + } + /// Handles a command that is preceeded by a slash fn handle_command_literal(iter: &mut SourceIterator, start: usize) -> Self { Self::walk_iter(iter, |c| !(c.is_whitespace() && c.is_ascii_control())); @@ -593,9 +861,15 @@ impl Token { // Found comment/single slash punctuation else if character == '/' { Self::handle_comment(iter, start, character, prev_token, handler) - } else if character == '"' { + } + // Found string literal + else if character == '"' { Ok(Self::handle_string_literal(iter, start)) } + // Found macro string literal + else if character == '`' { + Self::handle_macro_string_literal(iter, start) + } // Found numeric literal else if character.is_ascii_digit() { Ok(Self::handle_numeric_literal(iter, start)) diff --git a/src/lexical/token_stream.rs b/src/lexical/token_stream.rs index 1e3fb99..9502e4a 100644 --- a/src/lexical/token_stream.rs +++ b/src/lexical/token_stream.rs @@ -5,10 +5,13 @@ use std::{fmt::Debug, sync::Arc}; use derive_more::{Deref, From}; use enum_as_inner::EnumAsInner; -use crate::base::{ - self, - source_file::{SourceElement, SourceFile, Span}, - Handler, +use crate::{ + base::{ + self, + source_file::{SourceElement, SourceFile, Span}, + Handler, + }, + lexical::Error, }; use super::{ @@ -62,6 +65,17 @@ impl TokenStream { Err(TokenizeError::FatalLexicalError) => { tracing::error!("Fatal lexical error encountered while tokenizing source code"); } + Err(TokenizeError::InvalidMacroNameCharacter(err)) => { + handler.receive(Error::TokenizeError( + TokenizeError::InvalidMacroNameCharacter(err), + )); + } + Err(TokenizeError::UnclosedMacroUsage(err)) => { + handler.receive(Error::TokenizeError(TokenizeError::UnclosedMacroUsage(err))); + } + Err(TokenizeError::EmptyMacroUsage(err)) => { + handler.receive(Error::TokenizeError(TokenizeError::EmptyMacroUsage(err))); + } } } @@ -184,7 +198,7 @@ pub enum TokenTree { impl SourceElement for TokenTree { fn span(&self) -> Span { match self { - Self::Token(token) => token.span().to_owned(), + Self::Token(token) => token.span(), Self::Delimited(delimited) => delimited .open .span() diff --git a/src/syntax/error.rs b/src/syntax/error.rs index 4f543ec..a69883a 100644 --- a/src/syntax/error.rs +++ b/src/syntax/error.rs @@ -5,7 +5,7 @@ use std::fmt::Display; use crate::{ base::{ log::{Message, Severity, SourceCodeDisplay}, - source_file::Span, + source_file::{SourceElement as _, Span}, }, lexical::token::{KeywordKind, Token}, }; @@ -34,6 +34,8 @@ pub enum SyntaxKind { Declaration, Numeric, StringLiteral, + MacroStringLiteral, + AnyStringLiteral, Statement, Expression, Type, @@ -69,6 +71,8 @@ impl SyntaxKind { Self::Declaration => "a declaration token".to_string(), Self::Numeric => "a numeric token".to_string(), Self::StringLiteral => "a string literal".to_string(), + Self::MacroStringLiteral => "a macro string literal".to_string(), + Self::AnyStringLiteral => "a (macro) string literal".to_string(), Self::Statement => "a statement syntax".to_string(), Self::Expression => "an expression syntax".to_string(), Self::Type => "a type syntax".to_string(), @@ -105,6 +109,7 @@ impl Display for UnexpectedSyntax { Some(Token::Numeric(..)) => "a numeric token".to_string(), Some(Token::CommandLiteral(..)) => "a literal command token".to_string(), Some(Token::StringLiteral(..)) => "a string literal token".to_string(), + Some(Token::MacroStringLiteral(..)) => "a macro string literal token".to_string(), None => "EOF".to_string(), }; @@ -117,7 +122,7 @@ impl Display for UnexpectedSyntax { write!( f, "\n{}", - SourceCodeDisplay::new(span.span(), Option::::None) + SourceCodeDisplay::new(&span.span(), Option::::None) ) }) } diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index b2e14b4..bf2460d 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -6,12 +6,18 @@ use enum_as_inner::EnumAsInner; use crate::{ base::{self, Handler}, lexical::{ - token::{Identifier, Keyword, KeywordKind, Numeric, Punctuation, StringLiteral, Token}, + token::{ + Identifier, Keyword, KeywordKind, MacroStringLiteral, Numeric, Punctuation, + StringLiteral, Token, + }, token_stream::{Delimited, Delimiter, TokenStream, TokenTree}, }, }; -use super::error::{Error, ParseResult, SyntaxKind, UnexpectedSyntax}; +use super::{ + error::{Error, ParseResult, SyntaxKind, UnexpectedSyntax}, + syntax_tree::AnyStringLiteral, +}; /// Represents a parser that reads a token stream and constructs an abstract syntax tree. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deref, DerefMut)] @@ -432,6 +438,49 @@ impl<'a> Frame<'a> { } } + /// Expects the next [`Token`] to be an [`MacroStringLiteral`], and returns it. + /// + /// # Errors + /// If the next [`Token`] is not an [`MacroStringLiteral`]. + pub fn parse_macro_string_literal( + &mut self, + handler: &impl Handler, + ) -> ParseResult { + match self.next_significant_token() { + Reading::Atomic(Token::MacroStringLiteral(literal)) => Ok(literal), + found => { + let err = Error::UnexpectedSyntax(UnexpectedSyntax { + expected: SyntaxKind::MacroStringLiteral, + found: found.into_token(), + }); + handler.receive(err.clone()); + Err(err) + } + } + } + + /// Expects the next [`Token`] to be an [`AnyStringLiteral`], and returns it. + /// + /// # Errors + /// If the next [`Token`] is not an [`AnyStringLiteral`]. + pub fn parse_any_string_literal( + &mut self, + handler: &impl Handler, + ) -> ParseResult { + match self.next_significant_token() { + Reading::Atomic(Token::StringLiteral(literal)) => Ok(literal.into()), + Reading::Atomic(Token::MacroStringLiteral(literal)) => Ok(literal.into()), + found => { + let err = Error::UnexpectedSyntax(UnexpectedSyntax { + expected: SyntaxKind::AnyStringLiteral, + found: found.into_token(), + }); + handler.receive(err.clone()); + Err(err) + } + } + } + /// Expects the next [`Token`] to be a [`Keyword`] of specific kind, and returns it. /// /// # Errors diff --git a/src/syntax/syntax_tree/condition.rs b/src/syntax/syntax_tree/condition.rs index 84632f8..ebdba7a 100644 --- a/src/syntax/syntax_tree/condition.rs +++ b/src/syntax/syntax_tree/condition.rs @@ -14,7 +14,7 @@ use crate::{ Handler, VoidHandler, }, lexical::{ - token::{Punctuation, StringLiteral, Token}, + token::{Punctuation, Token}, token_stream::Delimiter, }, syntax::{ @@ -23,6 +23,8 @@ use crate::{ }, }; +use super::AnyStringLiteral; + /// Condition that is viewed as a single entity during precedence parsing. /// /// Syntax Synopsis: @@ -31,7 +33,7 @@ use crate::{ /// PrimaryCondition: /// UnaryCondition /// | ParenthesizedCondition -/// | StringLiteral +/// | AnyStringLiteral /// ``` #[allow(missing_docs)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -39,7 +41,7 @@ use crate::{ pub enum PrimaryCondition { Unary(UnaryCondition), Parenthesized(ParenthesizedCondition), - StringLiteral(StringLiteral), + StringLiteral(AnyStringLiteral), } impl SourceElement for PrimaryCondition { @@ -354,7 +356,13 @@ impl<'a> Parser<'a> { // string literal Reading::Atomic(Token::StringLiteral(literal)) => { self.forward(); - Ok(PrimaryCondition::StringLiteral(literal)) + Ok(PrimaryCondition::StringLiteral(literal.into())) + } + + // macro string literal + Reading::Atomic(Token::MacroStringLiteral(literal)) => { + self.forward(); + Ok(PrimaryCondition::StringLiteral(literal.into())) } // parenthesized condition diff --git a/src/syntax/syntax_tree/expression.rs b/src/syntax/syntax_tree/expression.rs index 707dff7..cd1b52c 100644 --- a/src/syntax/syntax_tree/expression.rs +++ b/src/syntax/syntax_tree/expression.rs @@ -10,7 +10,9 @@ use crate::{ Handler, }, lexical::{ - token::{Identifier, Keyword, KeywordKind, Punctuation, StringLiteral, Token}, + token::{ + Identifier, Keyword, KeywordKind, MacroStringLiteral, Punctuation, StringLiteral, Token, + }, token_stream::Delimiter, }, syntax::{ @@ -52,6 +54,9 @@ impl SourceElement for Expression { /// ``` ebnf /// Primary: /// FunctionCall +/// | StringLiteral +/// | MacroStringLiteral +/// | LuaCode /// ``` #[allow(missing_docs)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -59,6 +64,7 @@ impl SourceElement for Expression { pub enum Primary { FunctionCall(FunctionCall), StringLiteral(StringLiteral), + MacroStringLiteral(MacroStringLiteral), Lua(Box), } @@ -67,6 +73,7 @@ impl SourceElement for Primary { match self { Self::FunctionCall(function_call) => function_call.span(), Self::StringLiteral(string_literal) => string_literal.span(), + Self::MacroStringLiteral(macro_string_literal) => macro_string_literal.span(), Self::Lua(lua_code) => lua_code.span(), } } @@ -180,6 +187,7 @@ impl<'a> Parser<'a> { /// # Errors /// - If the parser is not at a primary expression. /// - If the parser is not at a valid primary expression. + #[expect(clippy::too_many_lines)] pub fn parse_primary(&mut self, handler: &impl Handler) -> ParseResult { match self.stop_at_significant() { // identifier expression @@ -224,6 +232,14 @@ impl<'a> Parser<'a> { Ok(Primary::StringLiteral(literal)) } + // macro string literal expression + Reading::Atomic(Token::MacroStringLiteral(macro_string_literal)) => { + // eat the macro string literal + self.forward(); + + Ok(Primary::MacroStringLiteral(macro_string_literal)) + } + // lua code expression Reading::Atomic(Token::Keyword(lua_keyword)) if lua_keyword.keyword == KeywordKind::Lua => @@ -267,10 +283,11 @@ impl<'a> Parser<'a> { let combined = first .into_token() .and_then(|first| { - first.span().join(&last.into_token().map_or_else( - || first.span().to_owned(), - |last| last.span().to_owned(), - )) + first.span().join( + &last + .into_token() + .map_or_else(|| first.span(), |last| last.span()), + ) }) .expect("Invalid lua code span"); diff --git a/src/syntax/syntax_tree/mod.rs b/src/syntax/syntax_tree/mod.rs index df20696..bba3f3b 100644 --- a/src/syntax/syntax_tree/mod.rs +++ b/src/syntax/syntax_tree/mod.rs @@ -1,5 +1,6 @@ //! Contains the syntax tree nodes that represent the structure of the source code. +use derive_more::derive::From; use getset::Getters; use crate::{ @@ -9,7 +10,7 @@ use crate::{ Handler, VoidHandler, }, lexical::{ - token::{Punctuation, Token}, + token::{MacroStringLiteral, Punctuation, StringLiteral, Token}, token_stream::Delimiter, }, syntax::parser::Reading, @@ -64,6 +65,29 @@ pub struct DelimitedList { pub close: Punctuation, } +/// Represents a syntax tree node that can be either a string literal or a macro string literal. +/// +/// Syntax Synopsis: +/// ```ebnf +/// AnyStringLiteral: StringLiteral | MacroStringLiteral ; +/// ``` +#[allow(missing_docs)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, From)] +pub enum AnyStringLiteral { + StringLiteral(StringLiteral), + MacroStringLiteral(MacroStringLiteral), +} + +impl SourceElement for AnyStringLiteral { + fn span(&self) -> Span { + match self { + Self::StringLiteral(string_literal) => string_literal.span(), + Self::MacroStringLiteral(macro_string_literal) => macro_string_literal.span(), + } + } +} + impl<'a> Parser<'a> { /// Parses a list of elements enclosed by a pair of delimiters, separated by a separator. /// diff --git a/src/syntax/syntax_tree/statement/execute_block.rs b/src/syntax/syntax_tree/statement/execute_block.rs index 302ea5d..46ccffd 100644 --- a/src/syntax/syntax_tree/statement/execute_block.rs +++ b/src/syntax/syntax_tree/statement/execute_block.rs @@ -11,13 +11,13 @@ use crate::{ Handler, VoidHandler, }, lexical::{ - token::{Keyword, KeywordKind, Punctuation, StringLiteral, Token}, + token::{Keyword, KeywordKind, Punctuation, Token}, token_stream::Delimiter, }, syntax::{ error::{Error, ParseResult, SyntaxKind, UnexpectedSyntax}, parser::{DelimitedTree, Parser, Reading}, - syntax_tree::condition::ParenthesizedCondition, + syntax_tree::{condition::ParenthesizedCondition, AnyStringLiteral}, }, }; @@ -217,7 +217,7 @@ impl SourceElement for Else { /// /// ```ebnf /// As: -/// 'as' '(' StringLiteral ')' +/// 'as' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -231,7 +231,7 @@ pub struct As { open_paren: Punctuation, /// The selector of the as statement. #[get = "pub"] - as_selector: StringLiteral, + as_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -248,7 +248,7 @@ impl SourceElement for As { impl As { /// Dissolves the [`As`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.as_keyword, self.open_paren, @@ -263,7 +263,7 @@ impl As { /// Syntax Synopsis: /// ```ebnf /// Align: -/// 'align' '(' StringLiteral ')' +/// 'align' '(' AnyStringLiteral ')' /// ; #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)] @@ -276,7 +276,7 @@ pub struct Align { open_paren: Punctuation, /// The selector of the align statement. #[get = "pub"] - align_selector: StringLiteral, + align_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -294,7 +294,7 @@ impl SourceElement for Align { impl Align { /// Dissolves the [`Align`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.align_keyword, self.open_paren, @@ -309,7 +309,7 @@ impl Align { /// Syntax Synopsis: /// ```ebnf /// Anchored: -/// 'anchored' '(' StringLiteral ')' +/// 'anchored' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -323,7 +323,7 @@ pub struct Anchored { open_paren: Punctuation, /// The selector of the anchored statement. #[get = "pub"] - anchored_selector: StringLiteral, + anchored_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -339,7 +339,7 @@ impl SourceElement for Anchored { impl Anchored { /// Dissolves the [`Anchored`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.anchored_keyword, self.open_paren, @@ -354,7 +354,7 @@ impl Anchored { /// Syntax Synopsis: /// ```ebnf /// AsAt: -/// 'asat' '(' StringLiteral ')' +/// 'asat' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -368,7 +368,7 @@ pub struct AsAt { open_paren: Punctuation, /// The selector of the asat statement. #[get = "pub"] - asat_selector: StringLiteral, + asat_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -384,7 +384,7 @@ impl SourceElement for AsAt { impl AsAt { /// Dissolves the [`AsAt`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.asat_keyword, self.open_paren, @@ -399,7 +399,7 @@ impl AsAt { /// Syntax Synopsis: /// ```ebnf /// At: -/// 'at' '(' StringLiteral ')' +/// 'at' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -413,7 +413,7 @@ pub struct At { open_paren: Punctuation, /// The selector of the at statement. #[get = "pub"] - at_selector: StringLiteral, + at_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -429,7 +429,7 @@ impl SourceElement for At { impl At { /// Dissolves the [`At`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.at_keyword, self.open_paren, @@ -444,7 +444,7 @@ impl At { /// Syntax Synopsis: /// ```ebnf /// Facing: -/// 'facing' '(' StringLiteral ')' +/// 'facing' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -458,7 +458,7 @@ pub struct Facing { open_paren: Punctuation, /// The selector of the facing statement. #[get = "pub"] - facing_selector: StringLiteral, + facing_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -474,7 +474,7 @@ impl SourceElement for Facing { impl Facing { /// Dissolves the [`Facing`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.facing_keyword, self.open_paren, @@ -489,7 +489,7 @@ impl Facing { /// Syntax Synopsis: /// ```ebnf /// In: -/// 'in' '(' StringLiteral ')' +/// 'in' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -503,7 +503,7 @@ pub struct In { open_paren: Punctuation, /// The selector of the in statement. #[get = "pub"] - in_selector: StringLiteral, + in_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -519,7 +519,7 @@ impl SourceElement for In { impl In { /// Dissolves the [`In`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.in_keyword, self.open_paren, @@ -534,7 +534,7 @@ impl In { /// Syntax Synopsis: /// ```ebnf /// On: -/// 'on' '(' StringLiteral ')' +/// 'on' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -548,7 +548,7 @@ pub struct On { open_paren: Punctuation, /// The selector of the on statement. #[get = "pub"] - on_selector: StringLiteral, + on_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -564,7 +564,7 @@ impl SourceElement for On { impl On { /// Dissolves the [`On`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.on_keyword, self.open_paren, @@ -579,7 +579,7 @@ impl On { /// Syntax Synopsis: /// ```ebnf /// Positioned: -/// 'positioned' '(' StringLiteral ')' +/// 'positioned' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -593,7 +593,7 @@ pub struct Positioned { open_paren: Punctuation, /// The selector of the positioned statement. #[get = "pub"] - positioned_selector: StringLiteral, + positioned_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -609,7 +609,7 @@ impl SourceElement for Positioned { impl Positioned { /// Dissolves the [`Positioned`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.positioned_keyword, self.open_paren, @@ -624,7 +624,7 @@ impl Positioned { /// Syntax Synopsis: /// ```ebnf /// Rotated: -/// 'rotated' '(' StringLiteral ')' +/// 'rotated' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -638,7 +638,7 @@ pub struct Rotated { open_paren: Punctuation, /// The selector of the rotated statement. #[get = "pub"] - rotated_selector: StringLiteral, + rotated_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -654,7 +654,7 @@ impl SourceElement for Rotated { impl Rotated { /// Dissolves the [`Rotated`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.rotated_keyword, self.open_paren, @@ -669,7 +669,7 @@ impl Rotated { /// Syntax Synopsis: /// ```ebnf /// Store: -/// 'store' '(' StringLiteral ')' +/// 'store' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -683,7 +683,7 @@ pub struct Store { open_paren: Punctuation, /// The selector of the store statement. #[get = "pub"] - store_selector: StringLiteral, + store_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -699,7 +699,7 @@ impl SourceElement for Store { impl Store { /// Dissolves the [`Store`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.store_keyword, self.open_paren, @@ -714,7 +714,7 @@ impl Store { /// Syntax Synopsis: /// ```ebnf /// Summon: -/// 'summon' '(' StringLiteral ')' +/// 'summon' '(' AnyStringLiteral ')' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -728,7 +728,7 @@ pub struct Summon { open_paren: Punctuation, /// The selector of the summon statement. #[get = "pub"] - summon_selector: StringLiteral, + summon_selector: AnyStringLiteral, /// The close parenthesis. #[get = "pub"] close_paren: Punctuation, @@ -744,7 +744,7 @@ impl SourceElement for Summon { impl Summon { /// Dissolves the [`Summon`] into its components. #[must_use] - pub fn dissolve(self) -> (Keyword, Punctuation, StringLiteral, Punctuation) { + pub fn dissolve(self) -> (Keyword, Punctuation, AnyStringLiteral, Punctuation) { ( self.summon_keyword, self.open_paren, @@ -825,7 +825,7 @@ impl<'a> Parser<'a> { let argument = match self.stop_at_significant() { Reading::IntoDelimited(punc) if punc.punctuation == '(' => self.step_into( Delimiter::Parenthesis, - |parser| parser.parse_string_literal(handler), + |parser| parser.parse_any_string_literal(handler), handler, ), unexpected => { @@ -896,7 +896,7 @@ impl<'a> Parser<'a> { fn head_from_keyword( keyword: Keyword, - argument: DelimitedTree, + argument: DelimitedTree, ) -> ParseResult { Ok(match keyword.keyword { KeywordKind::Align => Align { diff --git a/src/transpile/conversions.rs b/src/transpile/conversions.rs index 6478113..4b35420 100644 --- a/src/transpile/conversions.rs +++ b/src/transpile/conversions.rs @@ -1,10 +1,19 @@ //! Conversion functions for converting between tokens/ast-nodes and [`shulkerbox`] types -use shulkerbox::datapack::Condition as DpCondition; +use shulkerbox::{ + datapack::Condition as DpCondition, + util::{MacroString, MacroStringPart}, +}; -use crate::syntax::syntax_tree::condition::{ - BinaryCondition, Condition, ConditionalBinaryOperator, ConditionalPrefixOperator, - PrimaryCondition, +use crate::{ + lexical::token::{MacroStringLiteral, MacroStringLiteralPart}, + syntax::syntax_tree::{ + condition::{ + BinaryCondition, Condition, ConditionalBinaryOperator, ConditionalPrefixOperator, + PrimaryCondition, + }, + AnyStringLiteral, + }, }; impl From for DpCondition { @@ -19,9 +28,7 @@ impl From for DpCondition { impl From for DpCondition { fn from(value: PrimaryCondition) -> Self { match value { - PrimaryCondition::StringLiteral(literal) => { - Self::Atom(literal.str_content().to_string()) - } + PrimaryCondition::StringLiteral(literal) => Self::Atom(literal.into()), PrimaryCondition::Parenthesized(cond) => cond.dissolve().1.into(), PrimaryCondition::Unary(prefix) => match prefix.operator() { ConditionalPrefixOperator::LogicalNot(_) => { @@ -32,6 +39,54 @@ impl From for DpCondition { } } +impl From<&AnyStringLiteral> for MacroString { + fn from(value: &AnyStringLiteral) -> Self { + match value { + AnyStringLiteral::StringLiteral(literal) => Self::from(literal.str_content().as_ref()), + AnyStringLiteral::MacroStringLiteral(literal) => Self::from(literal), + } + } +} + +impl From for MacroString { + fn from(value: AnyStringLiteral) -> Self { + Self::from(&value) + } +} + +impl From<&MacroStringLiteral> for MacroString { + fn from(value: &MacroStringLiteral) -> Self { + if value + .parts() + .iter() + .any(|p| matches!(p, MacroStringLiteralPart::MacroUsage { .. })) + { + Self::MacroString( + value + .parts() + .iter() + .map(|part| match part { + MacroStringLiteralPart::Text(span) => { + MacroStringPart::String(span.str().to_string()) + } + MacroStringLiteralPart::MacroUsage { identifier, .. } => { + MacroStringPart::MacroUsage(identifier.span.str().to_string()) + } + }) + .collect(), + ) + } else { + Self::String(value.str_content()) + } + } +} + +impl From for MacroString { + fn from(value: MacroStringLiteral) -> Self { + Self::from(&value) + } +} + impl From for DpCondition { fn from(value: BinaryCondition) -> Self { let (lhs, op, rhs) = value.dissolve(); diff --git a/src/transpile/lua.rs b/src/transpile/lua.rs index 1673739..72c1215 100644 --- a/src/transpile/lua.rs +++ b/src/transpile/lua.rs @@ -59,9 +59,8 @@ mod enabled { err })?; - self.handle_lua_result(lua_result).map_err(|err| { + self.handle_lua_result(lua_result).inspect_err(|err| { handler.receive(err.clone()); - err }) } diff --git a/src/transpile/transpiler.rs b/src/transpile/transpiler.rs index 6292d55..a059ff9 100644 --- a/src/transpile/transpiler.rs +++ b/src/transpile/transpiler.rs @@ -382,6 +382,9 @@ impl Transpiler { Expression::Primary(Primary::StringLiteral(string)) => { Ok(Some(Command::Raw(string.str_content().to_string()))) } + Expression::Primary(Primary::MacroStringLiteral(string)) => { + Ok(Some(Command::UsesMacro(string.into()))) + } Expression::Primary(Primary::Lua(code)) => { Ok(code.eval_string(handler)?.map(Command::Raw)) } @@ -603,53 +606,53 @@ impl Transpiler { None } } - ExecuteBlockHead::As(as_) => { - let selector = as_.as_selector().str_content(); - tail.map(|tail| Execute::As(selector.to_string(), Box::new(tail))) + ExecuteBlockHead::As(r#as) => { + let selector = r#as.as_selector(); + tail.map(|tail| Execute::As(selector.into(), Box::new(tail))) } ExecuteBlockHead::At(at) => { - let selector = at.at_selector().str_content(); - tail.map(|tail| Execute::At(selector.to_string(), Box::new(tail))) + let selector = at.at_selector(); + tail.map(|tail| Execute::At(selector.into(), Box::new(tail))) } ExecuteBlockHead::Align(align) => { - let align = align.align_selector().str_content(); - tail.map(|tail| Execute::Align(align.to_string(), Box::new(tail))) + let align = align.align_selector(); + tail.map(|tail| Execute::Align(align.into(), Box::new(tail))) } ExecuteBlockHead::Anchored(anchored) => { - let anchor = anchored.anchored_selector().str_content(); - tail.map(|tail| Execute::Anchored(anchor.to_string(), Box::new(tail))) + let anchor = anchored.anchored_selector(); + tail.map(|tail| Execute::Anchored(anchor.into(), Box::new(tail))) } - ExecuteBlockHead::In(in_) => { - let dimension = in_.in_selector().str_content(); - tail.map(|tail| Execute::In(dimension.to_string(), Box::new(tail))) + ExecuteBlockHead::In(r#in) => { + let dimension = r#in.in_selector(); + tail.map(|tail| Execute::In(dimension.into(), Box::new(tail))) } ExecuteBlockHead::Positioned(positioned) => { - let position = positioned.positioned_selector().str_content(); - tail.map(|tail| Execute::Positioned(position.to_string(), Box::new(tail))) + let position = positioned.positioned_selector(); + tail.map(|tail| Execute::Positioned(position.into(), Box::new(tail))) } ExecuteBlockHead::Rotated(rotated) => { - let rotation = rotated.rotated_selector().str_content(); - tail.map(|tail| Execute::Rotated(rotation.to_string(), Box::new(tail))) + let rotation = rotated.rotated_selector(); + tail.map(|tail| Execute::Rotated(rotation.into(), Box::new(tail))) } ExecuteBlockHead::Facing(facing) => { - let facing = facing.facing_selector().str_content(); - tail.map(|tail| Execute::Facing(facing.to_string(), Box::new(tail))) + let facing = facing.facing_selector(); + tail.map(|tail| Execute::Facing(facing.into(), Box::new(tail))) } ExecuteBlockHead::AsAt(as_at) => { - let selector = as_at.asat_selector().str_content(); - tail.map(|tail| Execute::AsAt(selector.to_string(), Box::new(tail))) + let selector = as_at.asat_selector(); + tail.map(|tail| Execute::AsAt(selector.into(), Box::new(tail))) } ExecuteBlockHead::On(on) => { - let dimension = on.on_selector().str_content(); - tail.map(|tail| Execute::On(dimension.to_string(), Box::new(tail))) + let dimension = on.on_selector(); + tail.map(|tail| Execute::On(dimension.into(), Box::new(tail))) } ExecuteBlockHead::Store(store) => { - let store = store.store_selector().str_content(); - tail.map(|tail| Execute::Store(store.to_string(), Box::new(tail))) + let store = store.store_selector(); + tail.map(|tail| Execute::Store(store.into(), Box::new(tail))) } ExecuteBlockHead::Summon(summon) => { - let entity = summon.summon_selector().str_content(); - tail.map(|tail| Execute::Summon(entity.to_string(), Box::new(tail))) + let entity = summon.summon_selector(); + tail.map(|tail| Execute::Summon(entity.into(), Box::new(tail))) } }) } diff --git a/tests/transpiling/main.rs b/tests/transpiling/main.rs index e7fdece..9f2845a 100644 --- a/tests/transpiling/main.rs +++ b/tests/transpiling/main.rs @@ -28,9 +28,9 @@ fn transpile_test1() { main_fn.add_command(Command::Raw("say Hello, World!".to_string())); let exec_cmd = Command::Execute(Execute::As( - "@a".to_string(), + "@a".to_string().into(), Box::new(Execute::If( - Condition::Atom("entity @p[distance=..5]".to_string()), + Condition::Atom("entity @p[distance=..5]".to_string().into()), Box::new(Execute::Run(Box::new(Command::Raw( "say You are close to me!".to_string(), )))),