From 6e019fb3ac2e469d28a601dd1db7d6327d4cda53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20H=C3=B6lting?= <87192362+moritz-hoelting@users.noreply.github.com> Date: Sun, 1 Sep 2024 22:41:43 +0200 Subject: [PATCH] extend file provider with read_bytes method and return cow --- Cargo.toml | 7 +++-- src/base/error.rs | 4 ++- src/base/file_provider.rs | 66 ++++++++++++++++++++++++++++++++------- src/base/source_file.rs | 8 +++-- src/transpile/error.rs | 5 +-- 5 files changed, 71 insertions(+), 19 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 545c0fa..c6bfcc7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,10 +13,11 @@ license = "MIT" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -default = ["lua", "shulkerbox"] +default = ["fs_access", "lua", "shulkerbox"] +fs_access = ["shulkerbox?/fs_access"] +lua = ["dep:mlua"] serde = ["dep:serde", "shulkerbox?/serde"] shulkerbox = ["dep:shulkerbox"] -lua = ["dep:mlua"] [target.'cfg(target_arch = "wasm32")'.dependencies] path-absolutize = { version = "3.1.1", features = ["use_unix_paths_on_wasm"] } @@ -31,7 +32,7 @@ itertools = "0.13.0" mlua = { version = "0.9.7", features = ["lua54", "vendored"], optional = true } path-absolutize = "3.1.1" serde = { version = "1.0.197", features = ["derive", "rc"], optional = true } -shulkerbox = { git = "https://github.com/moritz-hoelting/shulkerbox", default-features = false, optional = true, rev = "a2d20dab8ea97bbd873edafb23afaad34292457f" } +shulkerbox = { git = "https://github.com/moritz-hoelting/shulkerbox", default-features = false, optional = true, rev = "aff342a64a94981af942223345b5a5f105212957" } strsim = "0.11.1" strum = { version = "0.26.2", features = ["derive"] } strum_macros = "0.26.2" diff --git a/src/base/error.rs b/src/base/error.rs index b9051e5..6e46584 100644 --- a/src/base/error.rs +++ b/src/base/error.rs @@ -2,8 +2,10 @@ #[allow(missing_docs)] #[derive(Debug, thiserror::Error)] pub enum Error { - #[error("An error occurred while working with Input/Output.")] + #[error("An error occurred while working with Input/Output: {0}")] IoError(String), + #[error(transparent)] + Utf8Error(#[from] std::str::Utf8Error), #[error("An error occurred while lexing the source code.")] LexicalError(#[from] crate::lexical::Error), #[error("An error occured while tokenizing the source code.")] diff --git a/src/base/file_provider.rs b/src/base/file_provider.rs index 037a710..f81ee53 100644 --- a/src/base/file_provider.rs +++ b/src/base/file_provider.rs @@ -1,15 +1,30 @@ -use std::path::{Path, PathBuf}; +use std::{ + borrow::Cow, + path::{Path, PathBuf}, +}; use super::Error; /// A trait for providing file contents. pub trait FileProvider { + /// Reads the contents of the file at the given path as bytes. + /// + /// # Errors + /// - If an error occurs while reading the file. + /// - If the file does not exist. + fn read_bytes>(&self, path: P) -> Result, Error>; + /// Reads the contents of the file at the given path. /// /// # Errors /// - If an error occurs while reading the file. /// - If the file does not exist. - fn read_to_string>(&self, path: P) -> Result; + /// - If the file is not valid UTF-8. + fn read_str>(&self, path: P) -> Result, Error> { + let bytes = self.read_bytes(path)?; + let string = std::str::from_utf8(&bytes)?.to_string(); + Ok(Cow::Owned(string)) + } } /// Provides file contents from the file system. @@ -37,28 +52,54 @@ where } impl FileProvider for FsProvider { - fn read_to_string>(&self, path: P) -> Result { + fn read_bytes>(&self, path: P) -> Result, Error> { let full_path = self.root.join(path); - std::fs::read_to_string(full_path).map_err(|err| Error::IoError(err.to_string())) + std::fs::read(full_path) + .map(Cow::Owned) + .map_err(|err| Error::IoError(err.to_string())) + } + + fn read_str>(&self, path: P) -> Result, Error> { + let full_path = self.root.join(path); + std::fs::read_to_string(full_path) + .map(Cow::Owned) + .map_err(|err| Error::IoError(err.to_string())) } } #[cfg(feature = "shulkerbox")] mod vfs { + use std::borrow::Cow; + use super::{Error, FileProvider, Path}; use shulkerbox::virtual_fs::{VFile, VFolder}; impl FileProvider for VFolder { - fn read_to_string>(&self, path: P) -> Result { + fn read_bytes>(&self, path: P) -> Result, Error> { + normalize_path_str(path).map_or_else( + || Err(Error::IoError("Invalid path".to_string())), + |path| { + self.get_file(&path) + .ok_or_else(|| Error::IoError("File not found".to_string())) + .map(|file| Cow::Borrowed(file.as_bytes())) + }, + ) + } + + fn read_str>(&self, path: P) -> Result, Error> { normalize_path_str(path).map_or_else( || Err(Error::IoError("Invalid path".to_string())), |path| { self.get_file(&path) .ok_or_else(|| Error::IoError("File not found".to_string())) .and_then(|file| match file { - VFile::Text(text) => Ok(text.to_owned()), - VFile::Binary(bin) => String::from_utf8(bin.clone()) - .map_err(|err| Error::IoError(err.to_string())), + VFile::Text(text) => Ok(Cow::Borrowed(text.as_str())), + VFile::Binary(bin) => { + let string = std::str::from_utf8(bin) + .map_err(|err| Error::IoError(err.to_string()))?; + + Ok(Cow::Borrowed(string)) + } }) }, ) @@ -112,13 +153,16 @@ mod vfs { dir.add_file("foo.txt", VFile::Text("foo".to_string())); dir.add_file("bar/baz.txt", VFile::Text("bar, baz".to_string())); - assert_eq!(dir.read_to_string("foo.txt").unwrap(), "foo".to_string()); assert_eq!( - dir.read_to_string("bar/baz.txt").unwrap(), + dir.read_str("foo.txt").unwrap().into_owned(), + "foo".to_string() + ); + assert_eq!( + dir.read_str("bar/baz.txt").unwrap().into_owned(), "bar, baz".to_string() ); assert!(matches!( - dir.read_to_string("nonexistent.txt"), + dir.read_str("nonexistent.txt"), Err(Error::IoError(_)) )); } diff --git a/src/base/source_file.rs b/src/base/source_file.rs index aec14c9..cfcd32f 100644 --- a/src/base/source_file.rs +++ b/src/base/source_file.rs @@ -92,8 +92,12 @@ impl SourceFile { identifier: String, provider: &impl FileProvider, ) -> Result, Error> { - let source = provider.read_to_string(path)?; - Ok(Self::new(path.to_path_buf(), identifier, source)) + let source = provider.read_str(path)?; + Ok(Self::new( + path.to_path_buf(), + identifier, + source.into_owned(), + )) } /// Get the [`Location`] of a given byte index diff --git a/src/transpile/error.rs b/src/transpile/error.rs index 2211359..b0c9fc9 100644 --- a/src/transpile/error.rs +++ b/src/transpile/error.rs @@ -51,10 +51,11 @@ impl MissingFunctionDeclaration { let alternatives = functions .iter() .filter_map(|((program_identifier, function_name), data)| { - let normalized_distance = strsim::normalized_levenshtein(own_name, function_name); + let normalized_distance = + strsim::normalized_damerau_levenshtein(own_name, function_name); (program_identifier == own_program_identifier && (normalized_distance > 0.8 - || strsim::levenshtein(own_name, function_name) < 3)) + || strsim::damerau_levenshtein(own_name, function_name) < 3)) .then_some((normalized_distance, data)) }) .sorted_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal))