From 515453108390c93e03761087354f75dd8113dba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20H=C3=B6lting?= <87192362+moritz-hoelting@users.noreply.github.com> Date: Fri, 15 Nov 2024 10:33:55 +0100 Subject: [PATCH] unescape macro string contents --- CHANGELOG.md | 2 + src/lexical/token.rs | 12 +----- src/lib.rs | 1 + src/transpile/conversions.rs | 9 ++-- src/transpile/transpiler.rs | 2 +- src/transpile/util.rs | 39 +++-------------- src/util.rs | 82 ++++++++++++++++++++++++++++++++++++ 7 files changed, 96 insertions(+), 51 deletions(-) create mode 100644 src/util.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index f14d27e..d459384 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Macro strings +- Function parameters/arguments ### Changed diff --git a/src/lexical/token.rs b/src/lexical/token.rs index 9a0d9e1..0b200ef 100644 --- a/src/lexical/token.rs +++ b/src/lexical/token.rs @@ -299,17 +299,7 @@ impl MacroStringLiteral { for part in &self.parts { match part { MacroStringLiteralPart::Text(span) => { - let string = span.str(); - if string.contains('\\') { - content += &string - .replace("\\n", "\n") - .replace("\\r", "\r") - .replace("\\t", "\t") - .replace("\\\"", "\"") - .replace("\\\\", "\\"); - } else { - content += string; - } + content += &crate::util::unescape_macro_string(span.str()); } MacroStringLiteralPart::MacroUsage { identifier, .. } => { write!( diff --git a/src/lib.rs b/src/lib.rs index e951517..0ed9934 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,6 +19,7 @@ pub mod lexical; pub mod semantic; pub mod syntax; pub mod transpile; +pub mod util; use std::path::Path; diff --git a/src/transpile/conversions.rs b/src/transpile/conversions.rs index f7295e5..951e12a 100644 --- a/src/transpile/conversions.rs +++ b/src/transpile/conversions.rs @@ -66,13 +66,12 @@ impl From<&MacroStringLiteral> for MacroString { .parts() .iter() .map(|part| match part { - MacroStringLiteralPart::Text(span) => { - MacroStringPart::String(span.str().to_string()) - } + MacroStringLiteralPart::Text(span) => MacroStringPart::String( + crate::util::unescape_macro_string(span.str()).to_string(), + ), MacroStringLiteralPart::MacroUsage { identifier, .. } => { MacroStringPart::MacroUsage( - crate::transpile::util::identifier_to_macro(identifier.span.str()) - .to_string(), + super::util::identifier_to_macro(identifier.span.str()).to_string(), ) } }) diff --git a/src/transpile/transpiler.rs b/src/transpile/transpiler.rs index c4905d5..e57024f 100644 --- a/src/transpile/transpiler.rs +++ b/src/transpile/transpiler.rs @@ -548,7 +548,7 @@ impl Transpiler { format!( r#"{macro_name}:"{escaped}""#, macro_name = super::util::identifier_to_macro(ident), - escaped = super::util::escape_str(v) + escaped = crate::util::escape_str(v) ) }) .collect::>() diff --git a/src/transpile/util.rs b/src/transpile/util.rs index 8da2518..c600f25 100644 --- a/src/transpile/util.rs +++ b/src/transpile/util.rs @@ -40,30 +40,13 @@ where } } -/// Escapes `"` and `\` in a string. -#[must_use] -pub fn escape_str(s: &str) -> Cow { - if s.contains('"') || s.contains('\\') { - let mut escaped = String::with_capacity(s.len()); - for c in s.chars() { - match c { - '"' => escaped.push_str("\\\""), - '\\' => escaped.push_str("\\\\"), - _ => escaped.push(c), - } - } - Cow::Owned(escaped) - } else { - Cow::Borrowed(s) - } -} - /// Transforms an identifier to a macro name that only contains `a-zA-Z0-9_`. #[must_use] pub fn identifier_to_macro(ident: &str) -> Cow { - if ident - .chars() - .any(|c| !(c == '_' && c.is_ascii_alphanumeric())) + if ident.contains("__") + || ident + .chars() + .any(|c| !(c == '_' && c.is_ascii_alphanumeric())) { let new_ident = ident .chars() @@ -72,20 +55,8 @@ pub fn identifier_to_macro(ident: &str) -> Cow { let chksum = md5::hash(ident).to_hex_lowercase(); - Cow::Owned(new_ident + "_" + &chksum[..8]) + Cow::Owned(new_ident + "__" + &chksum[..8]) } else { Cow::Borrowed(ident) } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_escape_str() { - assert_eq!(escape_str("Hello, world!"), "Hello, world!"); - assert_eq!(escape_str(r#"Hello, "world"!"#), r#"Hello, \"world\"!"#); - assert_eq!(escape_str(r"Hello, \world\!"), r"Hello, \\world\\!"); - } -} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..2926b79 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,82 @@ +//! Utility functions for the `Shulkerscript` language. + +use std::borrow::Cow; + +/// Escapes `"` and `\` in a string. +#[must_use] +pub fn escape_str(s: &str) -> Cow { + if s.contains('"') || s.contains('\\') { + let mut escaped = String::with_capacity(s.len()); + for c in s.chars() { + match c { + '"' => escaped.push_str("\\\""), + '\\' => escaped.push_str("\\\\"), + _ => escaped.push(c), + } + } + Cow::Owned(escaped) + } else { + Cow::Borrowed(s) + } +} + +/// Unescapes '\`', `\`, `\n`, `\r` and `\t` in a string. +#[must_use] +pub fn unescape_macro_string(s: &str) -> Cow { + if s.contains('\\') || s.contains('`') { + Cow::Owned( + s.replace("\\n", "\n") + .replace("\\r", "\r") + .replace("\\t", "\t") + .replace("\\`", "`") + .replace("\\\\", "\\"), + ) + } else { + Cow::Borrowed(s) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_escape_str() { + assert_eq!(escape_str("Hello, world!"), "Hello, world!"); + assert_eq!(escape_str(r#"Hello, "world"!"#), r#"Hello, \"world\"!"#); + assert_eq!(escape_str(r"Hello, \world\!"), r"Hello, \\world\\!"); + } + + #[test] + fn test_unescape_macro_string() { + assert_eq!(unescape_macro_string("Hello, world!"), "Hello, world!"); + assert_eq!( + unescape_macro_string(r#"Hello, "world"!"#), + r#"Hello, "world"!"# + ); + assert_eq!( + unescape_macro_string(r"Hello, \world\!"), + r"Hello, \world\!" + ); + assert_eq!( + unescape_macro_string(r"Hello, \nworld\!"), + "Hello, \nworld\\!" + ); + assert_eq!( + unescape_macro_string(r"Hello, \rworld\!"), + "Hello, \rworld\\!" + ); + assert_eq!( + unescape_macro_string(r"Hello, \tworld\!"), + "Hello, \tworld\\!" + ); + assert_eq!( + unescape_macro_string(r"Hello, \`world\!"), + r"Hello, `world\!" + ); + assert_eq!( + unescape_macro_string(r"Hello, \\world\!"), + r"Hello, \world\!" + ); + } +}