From 183d3e85c65238bda0797179879aa0bf469cd383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20H=C3=B6lting?= <87192362+moritz-hoelting@users.noreply.github.com> Date: Mon, 25 Aug 2025 19:12:20 +0200 Subject: [PATCH] implement new tokenizer & parser for TemplateStringLiteral --- grammar.md | 23 ++- src/base/source_file.rs | 61 +++++- src/lexical/token.rs | 293 +++++++++------------------ src/semantic/mod.rs | 53 ++--- src/syntax/error.rs | 6 +- src/syntax/parser.rs | 53 +---- src/syntax/syntax_tree/expression.rs | 193 +++++++++++++++++- src/syntax/syntax_tree/mod.rs | 4 +- src/transpile/expression.rs | 4 +- src/transpile/internal_functions.rs | 10 +- src/transpile/util.rs | 9 +- 11 files changed, 402 insertions(+), 307 deletions(-) diff --git a/grammar.md b/grammar.md index fd32a50..5d50e56 100644 --- a/grammar.md +++ b/grammar.md @@ -170,7 +170,7 @@ VariableDeclarationAssignment: ## AnyStringLiteral ```ebnf -AnyStringLiteral: StringLiteral | MacroStringLiteral ; +AnyStringLiteral: StringLiteral | TemplateStringLiteral ; ``` ## AnnotationValue @@ -230,6 +230,13 @@ Expression: Primary | Binary ; ``` +## TemplateStringLiteral + +```ebnf +TemplateStringLiteral: + '`' ( TemplateStringLiteralText | '$(' Expression ')' )* '`'; +``` + ## Else ```ebnf @@ -301,6 +308,13 @@ Primary: | LuaCode ``` +## TemplateStringLiteralText + +```ebnf +TemplateStringLiteralText: + TEXT ; +``` + ## Align ```ebnf @@ -465,13 +479,6 @@ Prefix: ; ``` -## TemplateStringLiteral - -```ebnf -TemplateStringLiteral: - '`' ( TEXT | '$(' Expression ')' )* '`'; -``` - ## AssignmentDestination ```ebnf diff --git a/src/base/source_file.rs b/src/base/source_file.rs index 3d2d6fd..35852d7 100644 --- a/src/base/source_file.rs +++ b/src/base/source_file.rs @@ -3,7 +3,7 @@ use std::{ cmp::Ordering, fmt::Debug, - iter::{Iterator, Peekable}, + iter::Iterator, ops::Range, path::{Path, PathBuf}, str::CharIndices, @@ -11,6 +11,7 @@ use std::{ }; use getset::{CopyGetters, Getters}; +use itertools::{structs::MultiPeek, Itertools as _}; use super::{file_provider::FileProvider, Error}; @@ -72,8 +73,9 @@ impl SourceFile { pub fn iter(self: &Arc) -> SourceIterator<'_> { SourceIterator { source_file: self, - iterator: self.content().char_indices().peekable(), + iterator: self.content().char_indices().multipeek(), prev: None, + in_template_string_expression_open_count: Vec::new(), } } @@ -337,16 +339,69 @@ pub struct SourceIterator<'a> { /// Get the source file that the iterator is iterating over. #[get_copy = "pub"] source_file: &'a Arc, - iterator: Peekable>, + iterator: MultiPeek>, /// Get the previous character that was iterated over. #[get_copy = "pub"] prev: Option<(usize, char)>, + /// Current state for parsing template strings. + in_template_string_expression_open_count: Vec, } impl SourceIterator<'_> { /// Peek at the next character in the source file. pub fn peek(&mut self) -> Option<(usize, char)> { + self.iterator.reset_peek(); self.iterator.peek().copied() } + + /// Peek at the next character in the source file. + pub fn multipeek(&mut self) -> Option<(usize, char)> { + self.iterator.peek().copied() + } + + /// Reset the multipeek state of the iterator. + pub fn reset_multipeek(&mut self) { + self.iterator.reset_peek(); + } + + /// Increase the count of open parentheses in the current template string expression. + pub fn increase_template_string_expression_open_paren_count(&mut self) { + if let Some(count) = self.in_template_string_expression_open_count.last_mut() { + *count += 1; + } + } + + /// Decrease the count of open parentheses in the current template string expression. + pub fn decrease_template_string_expression_open_paren_count(&mut self) { + if let Some(count) = self.in_template_string_expression_open_count.last_mut() { + *count = count.saturating_sub(1); + } + } + + /// Enter a template string expression. + pub fn enter_template_string(&mut self) { + self.in_template_string_expression_open_count.push(0); + } + + /// Exit a template string expression. + pub fn exit_template_string(&mut self) { + self.in_template_string_expression_open_count.pop(); + } + + /// Check if the iterator is currently in a template string expression. + #[must_use] + pub fn is_in_template_string_expression(&self) -> Option { + self.in_template_string_expression_open_count + .last() + .map(|&count| count > 0) + } + + /// Get the number of open parentheses in the current template string expression. + #[must_use] + pub fn template_string_expression_open_paren_count(&self) -> Option { + self.in_template_string_expression_open_count + .last() + .copied() + } } impl Iterator for SourceIterator<'_> { type Item = (usize, char); diff --git a/src/lexical/token.rs b/src/lexical/token.rs index 49fbbe2..fb0296b 100644 --- a/src/lexical/token.rs +++ b/src/lexical/token.rs @@ -8,14 +8,11 @@ use std::{ sync::OnceLock, }; -use crate::{ - base::{ - self, - log::SourceCodeDisplay, - source_file::{SourceElement, SourceIterator, Span}, - Handler, - }, - syntax::syntax_tree::expression::{Expression, Primary}, +use crate::base::{ + self, + log::SourceCodeDisplay, + source_file::{SourceElement, SourceIterator, Span}, + Handler, }; use derive_more::From; use enum_as_inner::EnumAsInner; @@ -168,7 +165,7 @@ pub enum Token { DocComment(DocComment), CommandLiteral(CommandLiteral), StringLiteral(StringLiteral), - TemplateStringLiteral(Box), + TemplateStringText(TemplateStringLiteralText), } impl SourceElement for Token { @@ -184,7 +181,7 @@ impl SourceElement for Token { Self::DocComment(token) => token.span(), Self::CommandLiteral(token) => token.span(), Self::StringLiteral(token) => token.span(), - Self::TemplateStringLiteral(token) => token.span(), + Self::TemplateStringText(token) => token.span(), } } } @@ -355,79 +352,25 @@ impl SourceElement for StringLiteral { } } -/// Represents a hardcoded template string literal value in the source code. +/// Represents a hardcoded template string text value in the source code. /// /// ```ebnf -/// TemplateStringLiteral: -/// '`' ( TEXT | '$(' Expression ')' )* '`'; +/// TemplateStringLiteralText: +/// TEXT ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct TemplateStringLiteral { - /// The backtick that starts the template string literal. - starting_backtick: Punctuation, - /// The parts that make up the template string literal. - parts: Vec, - /// The backtick that ends the template string literal. - ending_backtick: Punctuation, +pub struct TemplateStringLiteralText { + /// Is the span that makes up the token. + pub span: Span, } -impl TemplateStringLiteral { - /// Returns the string content without escapement characters, leading and trailing double quotes. - #[must_use] - pub fn str_content(&self) -> String { - let mut content = String::new(); - - for part in &self.parts { - match part { - TemplateStringLiteralPart::Text(span) => { - content += &crate::util::unescape_macro_string(span.str()); - } - TemplateStringLiteralPart::Expression { expression, .. } => { - // write!( - // content, - // "$({})", - // crate::util::identifier_to_macro(identifier.span.str()) - // ) - // .expect("can always write to string"); - todo!("handle expression in template string literal") - } - } - } - - content - } - - /// Returns the parts that make up the template string literal. - #[must_use] - pub fn parts(&self) -> &[TemplateStringLiteralPart] { - &self.parts - } -} - -impl SourceElement for TemplateStringLiteral { +impl SourceElement for TemplateStringLiteralText { fn span(&self) -> Span { - self.starting_backtick - .span - .join(&self.ending_backtick.span) - .expect("Invalid template string literal span") + self.span.clone() } } -/// Represents a part of a template string literal value in the source code. -#[allow(missing_docs)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum TemplateStringLiteralPart { - Text(Span), - Expression { - dollar: Punctuation, - open_brace: Punctuation, - expression: Expression, - close_brace: Punctuation, - }, -} - /// Is an enumeration representing the two kinds of comments in the Shulkerscript programming language. #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -618,26 +561,6 @@ impl Token { ) } - /// Creates a span from the given start location to the current location of the iterator with the given offset. - #[must_use] - fn create_span_with_end_offset( - start: usize, - iter: &mut SourceIterator, - end_offset: isize, - ) -> Span { - iter.peek().map_or_else( - || Span::to_end_with_offset(iter.source_file().clone(), start, end_offset).unwrap(), - |(index, _)| { - Span::new( - iter.source_file().clone(), - start, - index.saturating_add_signed(end_offset), - ) - .unwrap() - }, - ) - } - /// Checks if the given character is a valid first character of an identifier. fn is_first_identifier_character(character: char) -> bool { character == '_' @@ -794,113 +717,78 @@ impl Token { .into() } - /// Handles a sequence of characters that are enclosed in backticks and contain expressions - fn handle_template_string_literal( - iter: &mut SourceIterator, - mut start: usize, - ) -> Result { - let mut is_escaped = false; - let mut is_inside_expression = false; - let mut encountered_open_parenthesis = false; - let starting_backtick = Punctuation { - span: Self::create_span(start, iter), - punctuation: '`', - }; - start += 1; - let mut parts = Vec::new(); - - while iter.peek().is_some() { - let (index, character) = iter.next().unwrap(); - - #[expect(clippy::collapsible_else_if)] - if is_inside_expression { - if character == ')' { - // Check if the template usage is empty - if start + 2 == index { - return Err(UnclosedExpressionInTemplateUsage { - span: Span::new(iter.source_file().clone(), start, index + 1).unwrap(), - } - .into()); - } - parts.push(TemplateStringLiteralPart::Expression { - dollar: Punctuation { - span: Span::new(iter.source_file().clone(), start, start + 1).unwrap(), - punctuation: '$', - }, - open_brace: Punctuation { - span: Span::new(iter.source_file().clone(), start + 1, start + 2) - .unwrap(), - punctuation: '(', - }, - expression: { - // TODO: correctly parse expression - Expression::Primary(Primary::Identifier(Identifier { - span: Self::create_span_with_end_offset(start + 2, iter, -1), - })) - }, - close_brace: Punctuation { - span: Span::new(iter.source_file().clone(), index, index + 1).unwrap(), - punctuation: ')', - }, - }); - start = index + 1; - is_inside_expression = false; - } else if !encountered_open_parenthesis && character == '(' { - encountered_open_parenthesis = true; - } else if encountered_open_parenthesis && !Self::is_identifier_character(character) - { - if character == '`' { - return Err(UnclosedExpressionInTemplateUsage { - span: Span::new(iter.source_file().clone(), start, start + 2).unwrap(), - } - .into()); - } - - Self::walk_iter(iter, |c| c != ')' && !Self::is_identifier_character(c)); - return Err(InvalidMacroNameCharacter { - span: Self::create_span(index, iter), - } - .into()); - } - } else { - if character == '$' && iter.peek().is_some_and(|(_, c)| c == '(') { - parts.push(TemplateStringLiteralPart::Text( - Self::create_span_with_end_offset(start, iter, -1), - )); - start = index; - is_inside_expression = true; - encountered_open_parenthesis = false; - } else if character == '\\' { - is_escaped = !is_escaped; - } else if character == '`' && !is_escaped { - if start != index { - parts.push(TemplateStringLiteralPart::Text( - Self::create_span_with_end_offset(start, iter, -1), - )); - } - start = index; - break; - } else { - is_escaped = false; - } - } + /// Handles a backticks for opening and closing template strings + fn handle_template_string_quotes(iter: &mut SourceIterator, start: usize) -> Self { + if iter + .is_in_template_string_expression() + .is_some_and(|last| !last) + { + // in template string text + iter.exit_template_string(); + } else { + // outside template string or in expression + iter.enter_template_string(); } - if is_inside_expression { - Err(UnclosedExpressionInTemplateUsage { - span: Span::new(iter.source_file().clone(), start, start + 2).unwrap(), + Punctuation { + span: Self::create_span(start, iter), + punctuation: '`', + } + .into() + } + + fn handle_template_string_inner( + iter: &mut SourceIterator, + start: usize, + character: char, + prev_token: Option<&Self>, + ) -> Self { + if character == '$' && iter.peek().is_some_and(|(_, c)| c == '(') { + // starts immediately with expression, return punctuation + return Punctuation { + span: Self::create_span(start, iter), + punctuation: '$', } - .into()) - } else { - Ok(Box::new(TemplateStringLiteral { - starting_backtick, - parts, - ending_backtick: Punctuation { + .into(); + } + + match (character, prev_token) { + ('(', Some(Self::Punctuation(punc))) if punc.punctuation == '$' => { + // Found expression opening parenthesis + iter.increase_template_string_expression_open_paren_count(); + + return Punctuation { span: Self::create_span(start, iter), - punctuation: '`', - }, - }) - .into()) + punctuation: '(', + } + .into(); + } + _ => {} + } + + loop { + if character != '`' { + iter.reset_multipeek(); + Self::walk_iter(iter, |c| c != '$' && c != '`'); + } + + iter.reset_multipeek(); + let first_peek_none_or_backtick = iter.multipeek().map(|(_, c)| c); + let second_peek_open_paren = iter.multipeek().is_some_and(|(_, c)| c == '('); + + if character == '`' + || first_peek_none_or_backtick.is_none_or(|c| c == '`') + || second_peek_open_paren + { + // Found expression start, end of text + + break TemplateStringLiteralText { + span: Self::create_span(start, iter), + } + .into(); + } + + iter.next(); } } @@ -934,8 +822,13 @@ impl Token { .next() .ok_or(TokenizeError::EndOfSourceCodeIteratorArgument)?; + if iter.is_in_template_string_expression().is_some_and(|b| !b) && character != '`' { + Ok(Self::handle_template_string_inner( + iter, start, character, prev_token, + )) + } // Found white spaces - if character.is_whitespace() { + else if character.is_whitespace() { Ok(Self::handle_whitespace(iter, start)) } // Found identifier/keyword @@ -952,7 +845,7 @@ impl Token { } // Found macro string literal else if character == '`' { - Self::handle_template_string_literal(iter, start) + Ok(Self::handle_template_string_quotes(iter, start)) } // Found integer literal else if character.is_ascii_digit() { @@ -960,6 +853,12 @@ impl Token { } // Found a punctuation else if character.is_ascii_punctuation() { + if character == '(' { + iter.increase_template_string_expression_open_paren_count(); + } else if character == ')' { + iter.decrease_template_string_expression_open_paren_count(); + } + Ok(Punctuation { span: Self::create_span(start, iter), punctuation: character, diff --git a/src/semantic/mod.rs b/src/semantic/mod.rs index 3863f40..401ea86 100644 --- a/src/semantic/mod.rs +++ b/src/semantic/mod.rs @@ -4,10 +4,13 @@ use crate::{ base::{self, source_file::SourceElement as _, Handler}, - lexical::token::{KeywordKind, TemplateStringLiteral, TemplateStringLiteralPart}, + lexical::token::KeywordKind, syntax::syntax_tree::{ declaration::{Declaration, Function, FunctionVariableType, ImportItems}, - expression::{Binary, BinaryOperator, Expression, LuaCode, PrefixOperator, Primary}, + expression::{ + Binary, BinaryOperator, Expression, LuaCode, PrefixOperator, Primary, + TemplateStringLiteral, TemplateStringLiteralPart, + }, program::{Namespace, ProgramFile}, statement::{ execute_block::{ @@ -963,29 +966,33 @@ impl TemplateStringLiteral { for part in self.parts() { match part { - TemplateStringLiteralPart::Expression { expression, .. } => match expression { - Expression::Primary(Primary::Identifier(identifier)) => { - if let Some(variable_type) = scope.get_variable(identifier.span.str()) { - // TODO: correct checks - // if variable_type != VariableType::MacroParameter { - // let err = error::Error::UnexpectedExpression(UnexpectedExpression( - // Box::new(Expression::Primary(Primary::Identifier( - // identifier.clone(), - // ))), - // )); - // handler.receive(err.clone()); - // errs.push(err); - // } - } else { - let err = error::Error::UnknownIdentifier(UnknownIdentifier { - identifier: identifier.span.clone(), - }); - handler.receive(err.clone()); - errs.push(err); + TemplateStringLiteralPart::Expression { expression, .. } => { + match expression.as_ref() { + Expression::Primary(Primary::Identifier(identifier)) => { + if let Some(variable_type) = scope.get_variable(identifier.span.str()) { + // TODO: correct checks + // if variable_type != VariableType::MacroParameter { + // let err = error::Error::UnexpectedExpression(UnexpectedExpression( + // Box::new(Expression::Primary(Primary::Identifier( + // identifier.clone(), + // ))), + // )); + // handler.receive(err.clone()); + // errs.push(err); + // } + } else { + let err = error::Error::UnknownIdentifier(UnknownIdentifier { + identifier: identifier.span.clone(), + }); + handler.receive(err.clone()); + errs.push(err); + } + } + _ => { + // TODO: handle other expressions in template string literals } } - _ => todo!("handle other expressions in template string literals"), - }, + } TemplateStringLiteralPart::Text(_) => {} } } diff --git a/src/syntax/error.rs b/src/syntax/error.rs index b1eadff..0b144f9 100644 --- a/src/syntax/error.rs +++ b/src/syntax/error.rs @@ -37,7 +37,7 @@ pub enum SyntaxKind { Integer, Boolean, StringLiteral, - TemplateStringLiteral, + TemplateStringLiteralPart, AnyStringLiteral, Statement, Expression, @@ -76,7 +76,7 @@ impl SyntaxKind { Self::Integer => "an integer token".to_string(), Self::Boolean => "a boolean token".to_string(), Self::StringLiteral => "a string literal".to_string(), - Self::TemplateStringLiteral => "a template string literal".to_string(), + Self::TemplateStringLiteralPart => "part of a template string literal".to_string(), Self::AnyStringLiteral => "a (template) string literal".to_string(), Self::Statement => "a statement syntax".to_string(), Self::Expression => "an expression syntax".to_string(), @@ -116,7 +116,7 @@ impl Display for UnexpectedSyntax { Some(Token::Boolean(..)) => "a boolean token".to_string(), Some(Token::CommandLiteral(..)) => "a literal command token".to_string(), Some(Token::StringLiteral(..)) => "a string literal token".to_string(), - Some(Token::TemplateStringLiteral(..)) => "a template string literal token".to_string(), + Some(Token::TemplateStringText(..)) => "a template string text token".to_string(), None => "EOF".to_string(), }; diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index 0ded23d..4701067 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -6,18 +6,12 @@ use enum_as_inner::EnumAsInner; use crate::{ base::{self, Handler}, lexical::{ - token::{ - Identifier, Integer, Keyword, KeywordKind, Punctuation, StringLiteral, - TemplateStringLiteral, Token, - }, + token::{Identifier, Integer, Keyword, KeywordKind, Punctuation, StringLiteral, Token}, token_stream::{Delimited, Delimiter, TokenStream, TokenTree}, }, }; -use super::{ - error::{Error, ParseResult, SyntaxKind, UnexpectedSyntax}, - syntax_tree::AnyStringLiteral, -}; +use super::error::{Error, ParseResult, SyntaxKind, UnexpectedSyntax}; /// Represents a parser that reads a token stream and constructs an abstract syntax tree. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deref, DerefMut)] @@ -438,49 +432,6 @@ impl Frame<'_> { } } - /// Expects the next [`Token`] to be an [`TemplateStringLiteral`], and returns it. - /// - /// # Errors - /// If the next [`Token`] is not an [`TemplateStringLiteral`]. - pub fn parse_template_string_literal( - &mut self, - handler: &impl Handler, - ) -> ParseResult { - match self.next_significant_token() { - Reading::Atomic(Token::TemplateStringLiteral(literal)) => Ok(*literal), - found => { - let err = Error::UnexpectedSyntax(UnexpectedSyntax { - expected: SyntaxKind::TemplateStringLiteral, - found: found.into_token(), - }); - handler.receive(Box::new(err.clone())); - Err(err) - } - } - } - - /// Expects the next [`Token`] to be an [`AnyStringLiteral`], and returns it. - /// - /// # Errors - /// If the next [`Token`] is not an [`AnyStringLiteral`]. - pub fn parse_any_string_literal( - &mut self, - handler: &impl Handler, - ) -> ParseResult { - match self.next_significant_token() { - Reading::Atomic(Token::StringLiteral(literal)) => Ok(literal.into()), - Reading::Atomic(Token::TemplateStringLiteral(literal)) => Ok((*literal).into()), - found => { - let err = Error::UnexpectedSyntax(UnexpectedSyntax { - expected: SyntaxKind::AnyStringLiteral, - found: found.into_token(), - }); - handler.receive(Box::new(err.clone())); - Err(err) - } - } - } - /// Expects the next [`Token`] to be a [`Keyword`] of specific kind, and returns it. /// /// # Errors diff --git a/src/syntax/syntax_tree/expression.rs b/src/syntax/syntax_tree/expression.rs index d30f044..02c8137 100644 --- a/src/syntax/syntax_tree/expression.rs +++ b/src/syntax/syntax_tree/expression.rs @@ -14,14 +14,15 @@ use crate::{ lexical::{ token::{ Boolean, Identifier, Integer, Keyword, KeywordKind, Punctuation, StringLiteral, - TemplateStringLiteral, Token, + TemplateStringLiteralText, Token, }, token_stream::Delimiter, }, syntax::{ self, - error::{Error, ParseResult, UnexpectedSyntax}, + error::{Error, ParseResult, SyntaxKind, UnexpectedSyntax}, parser::{Parser, Reading}, + syntax_tree::AnyStringLiteral, }, }; @@ -391,6 +392,95 @@ impl SourceElement for FunctionCall { } } +/// Represents a hardcoded template string literal value in the source code. +/// +/// ```ebnf +/// TemplateStringLiteral: +/// '`' ( TemplateStringLiteralText | '$(' Expression ')' )* '`'; +/// ``` +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TemplateStringLiteral { + /// The backtick that starts the template string literal. + pub(crate) starting_backtick: Punctuation, + /// The parts that make up the template string literal. + pub(crate) parts: Vec, + /// The backtick that ends the template string literal. + pub(crate) ending_backtick: Punctuation, +} + +impl TemplateStringLiteral { + /// Returns the string content without escapement characters, leading and trailing double quotes. + #[must_use] + pub fn str_content(&self) -> String { + let mut content = String::new(); + + for part in &self.parts { + match part { + TemplateStringLiteralPart::Text(text) => { + content += &crate::util::unescape_macro_string(text.span.str()); + } + TemplateStringLiteralPart::Expression { expression, .. } => { + // write!( + // content, + // "$({})", + // crate::util::identifier_to_macro(identifier.span.str()) + // ) + // .expect("can always write to string"); + todo!("handle expression in template string literal") + } + } + } + + content + } + + /// Returns the parts that make up the template string literal. + #[must_use] + pub fn parts(&self) -> &[TemplateStringLiteralPart] { + &self.parts + } +} + +impl SourceElement for TemplateStringLiteral { + fn span(&self) -> Span { + self.starting_backtick + .span + .join(&self.ending_backtick.span) + .expect("Invalid template string literal span") + } +} + +/// Represents a part of a template string literal value in the source code. +#[allow(missing_docs)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum TemplateStringLiteralPart { + Text(TemplateStringLiteralText), + Expression { + dollar: Punctuation, + open_brace: Punctuation, + expression: Box, + close_brace: Punctuation, + }, +} + +impl SourceElement for TemplateStringLiteralPart { + fn span(&self) -> Span { + match self { + Self::Text(text) => text.span(), + Self::Expression { + dollar, + close_brace, + .. + } => dollar + .span() + .join(&close_brace.span()) + .expect("Invalid template string literal part span"), + } + } +} + /// Represents a lua code block in the syntax tree. /// /// Syntax Synopsis: @@ -589,6 +679,13 @@ impl Parser<'_> { })) } + // template string literal + Reading::Atomic(Token::Punctuation(punc)) if punc.punctuation == '`' => { + let template_string_literal = self.parse_template_string_literal(handler)?; + + Ok(Primary::TemplateStringLiteral(template_string_literal)) + } + // parenthesized expression Reading::IntoDelimited(left_parenthesis) if left_parenthesis.punctuation == '(' => self .parse_parenthesized(handler) @@ -661,14 +758,6 @@ impl Parser<'_> { Ok(Primary::StringLiteral(literal)) } - // template string literal expression - Reading::Atomic(Token::TemplateStringLiteral(template_string_literal)) => { - // eat the template string literal - self.forward(); - - Ok(Primary::TemplateStringLiteral(*template_string_literal)) - } - // lua code expression Reading::Atomic(Token::Keyword(lua_keyword)) if lua_keyword.keyword == KeywordKind::Lua => @@ -843,4 +932,88 @@ impl Parser<'_> { } } } + + /// Expects the next [`Token`] to be an [`TemplateStringLiteral`], and returns it. + /// + /// # Errors + /// If the next [`Token`] is not an [`TemplateStringLiteral`]. + pub fn parse_template_string_literal( + &mut self, + handler: &impl Handler, + ) -> ParseResult { + let starting_backtick = self.parse_punctuation('`', true, handler)?; + + let mut parts = Vec::new(); + + loop { + match self.stop_at_significant() { + Reading::Atomic(Token::Punctuation(ending_backtick)) + if ending_backtick.punctuation == '`' => + { + self.forward(); + + // closing tick + return Ok(TemplateStringLiteral { + starting_backtick, + parts, + ending_backtick, + }); + } + Reading::Atomic(Token::Punctuation(dollar)) if dollar.punctuation == '$' => { + self.forward(); + + let delimited_expression = self.step_into( + Delimiter::Parenthesis, + |parser| parser.parse_expression(handler), + handler, + )?; + + parts.push(TemplateStringLiteralPart::Expression { + dollar, + open_brace: delimited_expression.open, + expression: Box::new(delimited_expression.tree?), + close_brace: delimited_expression.close, + }); + } + + Reading::Atomic(Token::TemplateStringText(text)) => { + self.forward(); + parts.push(TemplateStringLiteralPart::Text(text)); + } + + unexpected => { + let err = Error::UnexpectedSyntax(UnexpectedSyntax { + expected: syntax::error::SyntaxKind::TemplateStringLiteralPart, + found: unexpected.into_token(), + }); + handler.receive(Box::new(err.clone())); + return Err(err); + } + } + } + } + + /// Expects the next [`Token`] to be an [`AnyStringLiteral`], and returns it. + /// + /// # Errors + /// If the next [`Token`] is not an [`AnyStringLiteral`]. + pub fn parse_any_string_literal( + &mut self, + handler: &impl Handler, + ) -> ParseResult { + match self.next_significant_token() { + Reading::Atomic(Token::StringLiteral(literal)) => Ok(literal.into()), + Reading::Atomic(Token::Punctuation(punc)) if punc.punctuation == '`' => self + .parse_template_string_literal(handler) + .map(AnyStringLiteral::TemplateStringLiteral), + found => { + let err = Error::UnexpectedSyntax(UnexpectedSyntax { + expected: SyntaxKind::AnyStringLiteral, + found: found.into_token(), + }); + handler.receive(Box::new(err.clone())); + Err(err) + } + } + } } diff --git a/src/syntax/syntax_tree/mod.rs b/src/syntax/syntax_tree/mod.rs index 57930e3..3a3f3fd 100644 --- a/src/syntax/syntax_tree/mod.rs +++ b/src/syntax/syntax_tree/mod.rs @@ -12,10 +12,10 @@ use crate::{ Handler, VoidHandler, }, lexical::{ - token::{Identifier, Punctuation, StringLiteral, TemplateStringLiteral, Token}, + token::{Identifier, Punctuation, StringLiteral, Token}, token_stream::Delimiter, }, - syntax::parser::Reading, + syntax::{parser::Reading, syntax_tree::expression::TemplateStringLiteral}, }; use super::{ diff --git a/src/transpile/expression.rs b/src/transpile/expression.rs index 02457d4..92ee758 100644 --- a/src/transpile/expression.rs +++ b/src/transpile/expression.rs @@ -20,7 +20,7 @@ use super::{ #[cfg(feature = "shulkerbox")] use crate::{ base::{self, source_file::SourceElement, Handler, VoidHandler}, - lexical::token::{Identifier, StringLiteral, TemplateStringLiteralPart}, + lexical::token::{Identifier, StringLiteral}, syntax::syntax_tree::expression::{ Binary, BinaryOperator, Expression, Indexed, MemberAccess, Parenthesized, PrefixOperator, Primary, @@ -413,6 +413,8 @@ impl Primary { }) .and_then(|val| val), Self::TemplateStringLiteral(template_string_literal) => { + use crate::syntax::syntax_tree::expression::TemplateStringLiteralPart; + if template_string_literal .parts() .iter() diff --git a/src/transpile/internal_functions.rs b/src/transpile/internal_functions.rs index fa719f6..5962ff1 100644 --- a/src/transpile/internal_functions.rs +++ b/src/transpile/internal_functions.rs @@ -12,9 +12,11 @@ use serde_json::{json, Value as JsonValue}; use crate::{ base::{source_file::SourceElement as _, VoidHandler}, - lexical::token::{Identifier, TemplateStringLiteralPart}, + lexical::token::Identifier, semantic::error::{InvalidFunctionArguments, UnexpectedExpression}, - syntax::syntax_tree::expression::{Expression, FunctionCall, Primary}, + syntax::syntax_tree::expression::{ + Expression, FunctionCall, Primary, TemplateStringLiteralPart, + }, transpile::{ error::{IllegalIndexing, IllegalIndexingReason, UnknownIdentifier}, expression::{ComptimeValue, DataLocation, ExpectedType, StorageType}, @@ -349,10 +351,10 @@ fn print_function( for part in template_string.parts() { match part { TemplateStringLiteralPart::Text(text) => { - parts.push(JsonValue::String(text.str().to_string())); + parts.push(JsonValue::String(text.span.str().to_string())); } TemplateStringLiteralPart::Expression { expression, .. } => { - match expression { + match expression.as_ref() { Expression::Primary(Primary::Identifier(identifier)) => { let (cur_contains_macro, cur_cmds, part) = get_identifier_part(identifier, transpiler, scope)?; diff --git a/src/transpile/util.rs b/src/transpile/util.rs index a4c1eca..46af0cf 100644 --- a/src/transpile/util.rs +++ b/src/transpile/util.rs @@ -4,9 +4,8 @@ use std::{fmt::Display, str::FromStr, sync::Arc}; use crate::{ base::{self, source_file::SourceElement as _, Handler}, - lexical::token::{TemplateStringLiteral, TemplateStringLiteralPart}, syntax::syntax_tree::{ - expression::{Expression, Primary}, + expression::{Expression, Primary, TemplateStringLiteral, TemplateStringLiteralPart}, AnyStringLiteral, }, transpile::{ @@ -291,11 +290,11 @@ impl TemplateStringLiteral { self.parts() .iter() .map(|part| match part { - TemplateStringLiteralPart::Text(span) => Ok(MacroStringPart::String( - crate::util::unescape_macro_string(span.str()).to_string(), + TemplateStringLiteralPart::Text(text) => Ok(MacroStringPart::String( + crate::util::unescape_macro_string(text.span.str()).to_string(), )), TemplateStringLiteralPart::Expression { expression, .. } => { - match expression { + match expression.as_ref() { Expression::Primary(Primary::Identifier(identifier)) => { #[expect(clippy::option_if_let_else)]