extend file provider with read_bytes method and return cow

This commit is contained in:
Moritz Hölting 2024-09-01 22:41:43 +02:00
parent 3332511290
commit 6e019fb3ac
5 changed files with 71 additions and 19 deletions

View File

@ -13,10 +13,11 @@ license = "MIT"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features] [features]
default = ["lua", "shulkerbox"] default = ["fs_access", "lua", "shulkerbox"]
fs_access = ["shulkerbox?/fs_access"]
lua = ["dep:mlua"]
serde = ["dep:serde", "shulkerbox?/serde"] serde = ["dep:serde", "shulkerbox?/serde"]
shulkerbox = ["dep:shulkerbox"] shulkerbox = ["dep:shulkerbox"]
lua = ["dep:mlua"]
[target.'cfg(target_arch = "wasm32")'.dependencies] [target.'cfg(target_arch = "wasm32")'.dependencies]
path-absolutize = { version = "3.1.1", features = ["use_unix_paths_on_wasm"] } path-absolutize = { version = "3.1.1", features = ["use_unix_paths_on_wasm"] }
@ -31,7 +32,7 @@ itertools = "0.13.0"
mlua = { version = "0.9.7", features = ["lua54", "vendored"], optional = true } mlua = { version = "0.9.7", features = ["lua54", "vendored"], optional = true }
path-absolutize = "3.1.1" path-absolutize = "3.1.1"
serde = { version = "1.0.197", features = ["derive", "rc"], optional = true } serde = { version = "1.0.197", features = ["derive", "rc"], optional = true }
shulkerbox = { git = "https://github.com/moritz-hoelting/shulkerbox", default-features = false, optional = true, rev = "a2d20dab8ea97bbd873edafb23afaad34292457f" } shulkerbox = { git = "https://github.com/moritz-hoelting/shulkerbox", default-features = false, optional = true, rev = "aff342a64a94981af942223345b5a5f105212957" }
strsim = "0.11.1" strsim = "0.11.1"
strum = { version = "0.26.2", features = ["derive"] } strum = { version = "0.26.2", features = ["derive"] }
strum_macros = "0.26.2" strum_macros = "0.26.2"

View File

@ -2,8 +2,10 @@
#[allow(missing_docs)] #[allow(missing_docs)]
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
pub enum Error { pub enum Error {
#[error("An error occurred while working with Input/Output.")] #[error("An error occurred while working with Input/Output: {0}")]
IoError(String), IoError(String),
#[error(transparent)]
Utf8Error(#[from] std::str::Utf8Error),
#[error("An error occurred while lexing the source code.")] #[error("An error occurred while lexing the source code.")]
LexicalError(#[from] crate::lexical::Error), LexicalError(#[from] crate::lexical::Error),
#[error("An error occured while tokenizing the source code.")] #[error("An error occured while tokenizing the source code.")]

View File

@ -1,15 +1,30 @@
use std::path::{Path, PathBuf}; use std::{
borrow::Cow,
path::{Path, PathBuf},
};
use super::Error; use super::Error;
/// A trait for providing file contents. /// A trait for providing file contents.
pub trait FileProvider { pub trait FileProvider {
/// Reads the contents of the file at the given path as bytes.
///
/// # Errors
/// - If an error occurs while reading the file.
/// - If the file does not exist.
fn read_bytes<P: AsRef<Path>>(&self, path: P) -> Result<Cow<[u8]>, Error>;
/// Reads the contents of the file at the given path. /// Reads the contents of the file at the given path.
/// ///
/// # Errors /// # Errors
/// - If an error occurs while reading the file. /// - If an error occurs while reading the file.
/// - If the file does not exist. /// - If the file does not exist.
fn read_to_string<P: AsRef<Path>>(&self, path: P) -> Result<String, Error>; /// - If the file is not valid UTF-8.
fn read_str<P: AsRef<Path>>(&self, path: P) -> Result<Cow<str>, Error> {
let bytes = self.read_bytes(path)?;
let string = std::str::from_utf8(&bytes)?.to_string();
Ok(Cow::Owned(string))
}
} }
/// Provides file contents from the file system. /// Provides file contents from the file system.
@ -37,28 +52,54 @@ where
} }
impl FileProvider for FsProvider { impl FileProvider for FsProvider {
fn read_to_string<P: AsRef<Path>>(&self, path: P) -> Result<String, Error> { fn read_bytes<P: AsRef<Path>>(&self, path: P) -> Result<Cow<[u8]>, Error> {
let full_path = self.root.join(path); let full_path = self.root.join(path);
std::fs::read_to_string(full_path).map_err(|err| Error::IoError(err.to_string())) std::fs::read(full_path)
.map(Cow::Owned)
.map_err(|err| Error::IoError(err.to_string()))
}
fn read_str<P: AsRef<Path>>(&self, path: P) -> Result<Cow<str>, Error> {
let full_path = self.root.join(path);
std::fs::read_to_string(full_path)
.map(Cow::Owned)
.map_err(|err| Error::IoError(err.to_string()))
} }
} }
#[cfg(feature = "shulkerbox")] #[cfg(feature = "shulkerbox")]
mod vfs { mod vfs {
use std::borrow::Cow;
use super::{Error, FileProvider, Path}; use super::{Error, FileProvider, Path};
use shulkerbox::virtual_fs::{VFile, VFolder}; use shulkerbox::virtual_fs::{VFile, VFolder};
impl FileProvider for VFolder { impl FileProvider for VFolder {
fn read_to_string<P: AsRef<Path>>(&self, path: P) -> Result<String, Error> { fn read_bytes<P: AsRef<Path>>(&self, path: P) -> Result<Cow<[u8]>, Error> {
normalize_path_str(path).map_or_else(
|| Err(Error::IoError("Invalid path".to_string())),
|path| {
self.get_file(&path)
.ok_or_else(|| Error::IoError("File not found".to_string()))
.map(|file| Cow::Borrowed(file.as_bytes()))
},
)
}
fn read_str<P: AsRef<Path>>(&self, path: P) -> Result<Cow<str>, Error> {
normalize_path_str(path).map_or_else( normalize_path_str(path).map_or_else(
|| Err(Error::IoError("Invalid path".to_string())), || Err(Error::IoError("Invalid path".to_string())),
|path| { |path| {
self.get_file(&path) self.get_file(&path)
.ok_or_else(|| Error::IoError("File not found".to_string())) .ok_or_else(|| Error::IoError("File not found".to_string()))
.and_then(|file| match file { .and_then(|file| match file {
VFile::Text(text) => Ok(text.to_owned()), VFile::Text(text) => Ok(Cow::Borrowed(text.as_str())),
VFile::Binary(bin) => String::from_utf8(bin.clone()) VFile::Binary(bin) => {
.map_err(|err| Error::IoError(err.to_string())), let string = std::str::from_utf8(bin)
.map_err(|err| Error::IoError(err.to_string()))?;
Ok(Cow::Borrowed(string))
}
}) })
}, },
) )
@ -112,13 +153,16 @@ mod vfs {
dir.add_file("foo.txt", VFile::Text("foo".to_string())); dir.add_file("foo.txt", VFile::Text("foo".to_string()));
dir.add_file("bar/baz.txt", VFile::Text("bar, baz".to_string())); dir.add_file("bar/baz.txt", VFile::Text("bar, baz".to_string()));
assert_eq!(dir.read_to_string("foo.txt").unwrap(), "foo".to_string());
assert_eq!( assert_eq!(
dir.read_to_string("bar/baz.txt").unwrap(), dir.read_str("foo.txt").unwrap().into_owned(),
"foo".to_string()
);
assert_eq!(
dir.read_str("bar/baz.txt").unwrap().into_owned(),
"bar, baz".to_string() "bar, baz".to_string()
); );
assert!(matches!( assert!(matches!(
dir.read_to_string("nonexistent.txt"), dir.read_str("nonexistent.txt"),
Err(Error::IoError(_)) Err(Error::IoError(_))
)); ));
} }

View File

@ -92,8 +92,12 @@ impl SourceFile {
identifier: String, identifier: String,
provider: &impl FileProvider, provider: &impl FileProvider,
) -> Result<Arc<Self>, Error> { ) -> Result<Arc<Self>, Error> {
let source = provider.read_to_string(path)?; let source = provider.read_str(path)?;
Ok(Self::new(path.to_path_buf(), identifier, source)) Ok(Self::new(
path.to_path_buf(),
identifier,
source.into_owned(),
))
} }
/// Get the [`Location`] of a given byte index /// Get the [`Location`] of a given byte index

View File

@ -51,10 +51,11 @@ impl MissingFunctionDeclaration {
let alternatives = functions let alternatives = functions
.iter() .iter()
.filter_map(|((program_identifier, function_name), data)| { .filter_map(|((program_identifier, function_name), data)| {
let normalized_distance = strsim::normalized_levenshtein(own_name, function_name); let normalized_distance =
strsim::normalized_damerau_levenshtein(own_name, function_name);
(program_identifier == own_program_identifier (program_identifier == own_program_identifier
&& (normalized_distance > 0.8 && (normalized_distance > 0.8
|| strsim::levenshtein(own_name, function_name) < 3)) || strsim::damerau_levenshtein(own_name, function_name) < 3))
.then_some((normalized_distance, data)) .then_some((normalized_distance, data))
}) })
.sorted_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal)) .sorted_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal))