Skip to content

Commit

Permalink
perf(compile): read embedded files as static references when UTF-8 an…
Browse files Browse the repository at this point in the history
…d reading as strings (#27033)
  • Loading branch information
dsherret authored and bartlomieju committed Nov 28, 2024
1 parent 27757e8 commit fdba58c
Show file tree
Hide file tree
Showing 19 changed files with 158 additions and 61 deletions.
2 changes: 2 additions & 0 deletions cli/args/deno_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ impl<'a> deno_config::fs::DenoConfigFs for DenoConfigFsAdapter<'a> {
self
.0
.read_text_file_lossy_sync(path, None)
// todo(https://github.com/denoland/deno_config/pull/140): avoid clone
.map(|s| s.into_owned())
.map_err(|err| err.into_io_error())
}

Expand Down
2 changes: 2 additions & 0 deletions cli/cache/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ impl<'a> deno_cache_dir::DenoCacheEnv for DenoCacheEnvFsAdapter<'a> {
self
.0
.read_file_sync(path, None)
// todo(https://github.com/denoland/deno_cache_dir/pull/66): avoid clone
.map(|bytes| bytes.into_owned())
.map_err(|err| err.into_io_error())
}

Expand Down
26 changes: 16 additions & 10 deletions cli/module_loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1060,7 +1060,10 @@ impl<TGraphContainer: ModuleGraphContainer> NodeRequireLoader
self.npm_resolver.ensure_read_permission(permissions, path)
}

fn load_text_file_lossy(&self, path: &Path) -> Result<String, AnyError> {
fn load_text_file_lossy(
&self,
path: &Path,
) -> Result<Cow<'static, str>, AnyError> {
// todo(dsherret): use the preloaded module from the graph if available?
let media_type = MediaType::from_path(path);
let text = self.fs.read_text_file_lossy_sync(path, None)?;
Expand All @@ -1075,15 +1078,18 @@ impl<TGraphContainer: ModuleGraphContainer> NodeRequireLoader
.into(),
);
}
self.emitter.emit_parsed_source_sync(
&specifier,
media_type,
// this is probably not super accurate due to require esm, but probably ok.
// If we find this causes a lot of churn in the emit cache then we should
// investigate how we can make this better
ModuleKind::Cjs,
&text.into(),
)
self
.emitter
.emit_parsed_source_sync(
&specifier,
media_type,
// this is probably not super accurate due to require esm, but probably ok.
// If we find this causes a lot of churn in the emit cache then we should
// investigate how we can make this better
ModuleKind::Cjs,
&text.into(),
)
.map(Cow::Owned)
} else {
Ok(text)
}
Expand Down
2 changes: 1 addition & 1 deletion cli/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ impl CjsCodeAnalyzer for CliCjsCodeAnalyzer {
if let Ok(source_from_file) =
self.fs.read_text_file_lossy_async(path, None).await
{
Cow::Owned(source_from_file)
source_from_file
} else {
return Ok(ExtNodeCjsAnalysis::Cjs(CjsAnalysisExports {
exports: vec![],
Expand Down
16 changes: 11 additions & 5 deletions cli/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ use crate::node::CliNodeCodeTranslator;
use crate::npm::CliNpmResolver;
use crate::npm::InnerCliNpmResolverRef;
use crate::util::sync::AtomicFlag;
use crate::util::text_encoding::from_utf8_lossy_owned;
use crate::util::text_encoding::from_utf8_lossy_cow;

pub type CjsTracker = deno_resolver::cjs::CjsTracker<DenoFsNodeResolverEnv>;
pub type IsCjsResolver =
Expand All @@ -62,7 +62,10 @@ pub struct ModuleCodeStringSource {
pub struct CliDenoResolverFs(pub Arc<dyn FileSystem>);

impl deno_resolver::fs::DenoResolverFs for CliDenoResolverFs {
fn read_to_string_lossy(&self, path: &Path) -> std::io::Result<String> {
fn read_to_string_lossy(
&self,
path: &Path,
) -> std::io::Result<Cow<'static, str>> {
self
.0
.read_text_file_lossy_sync(path, None)
Expand Down Expand Up @@ -182,18 +185,21 @@ impl NpmModuleLoader {

let code = if self.cjs_tracker.is_maybe_cjs(specifier, media_type)? {
// translate cjs to esm if it's cjs and inject node globals
let code = from_utf8_lossy_owned(code);
let code = from_utf8_lossy_cow(code);
ModuleSourceCode::String(
self
.node_code_translator
.translate_cjs_to_esm(specifier, Some(Cow::Owned(code)))
.translate_cjs_to_esm(specifier, Some(code))
.await?
.into_owned()
.into(),
)
} else {
// esm and json code is untouched
ModuleSourceCode::Bytes(code.into_boxed_slice().into())
ModuleSourceCode::Bytes(match code {
Cow::Owned(bytes) => bytes.into_boxed_slice().into(),
Cow::Borrowed(bytes) => bytes.into(),
})
};

Ok(ModuleCodeStringSource {
Expand Down
7 changes: 3 additions & 4 deletions cli/standalone/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,14 +282,13 @@ impl StandaloneModules {
.vfs
.read_file_all(entry, VfsFileSubDataKind::ModuleGraph)?,
Err(err) if err.kind() == ErrorKind::NotFound => {
let bytes = match RealFs.read_file_sync(&path, None) {
match RealFs.read_file_sync(&path, None) {
Ok(bytes) => bytes,
Err(FsError::Io(err)) if err.kind() == ErrorKind::NotFound => {
return Ok(None)
}
Err(err) => return Err(err.into()),
};
Cow::Owned(bytes)
}
}
Err(err) => return Err(err.into()),
};
Expand Down Expand Up @@ -694,7 +693,7 @@ impl<'a> DenoCompileBinaryWriter<'a> {
&file_path,
match maybe_source {
Some(source) => source,
None => RealFs.read_file_sync(&file_path, None)?,
None => RealFs.read_file_sync(&file_path, None)?.into_owned(),
},
VfsFileSubDataKind::ModuleGraph,
)
Expand Down
5 changes: 3 additions & 2 deletions cli/standalone/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ use crate::resolver::CliNpmReqResolver;
use crate::resolver::NpmModuleLoader;
use crate::util::progress_bar::ProgressBar;
use crate::util::progress_bar::ProgressBarStyle;
use crate::util::text_encoding::from_utf8_lossy_cow;
use crate::util::v8::construct_v8_flags;
use crate::worker::CliCodeCache;
use crate::worker::CliMainWorkerFactory;
Expand Down Expand Up @@ -516,13 +517,13 @@ impl NodeRequireLoader for EmbeddedModuleLoader {
fn load_text_file_lossy(
&self,
path: &std::path::Path,
) -> Result<String, AnyError> {
) -> Result<Cow<'static, str>, AnyError> {
let file_entry = self.shared.vfs.file_entry(path)?;
let file_bytes = self
.shared
.vfs
.read_file_all(file_entry, VfsFileSubDataKind::ModuleGraph)?;
Ok(String::from_utf8(file_bytes.into_owned())?)
Ok(from_utf8_lossy_cow(file_bytes))
}

fn is_maybe_cjs(
Expand Down
11 changes: 4 additions & 7 deletions cli/standalone/virtual_fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -743,15 +743,12 @@ impl deno_io::fs::File for FileBackedVfsFile {
Err(FsError::NotSupported)
}

fn read_all_sync(self: Rc<Self>) -> FsResult<Vec<u8>> {
self.read_to_end().map(|bytes| bytes.into_owned())
fn read_all_sync(self: Rc<Self>) -> FsResult<Cow<'static, [u8]>> {
self.read_to_end()
}
async fn read_all_async(self: Rc<Self>) -> FsResult<Vec<u8>> {
async fn read_all_async(self: Rc<Self>) -> FsResult<Cow<'static, [u8]>> {
let inner = (*self).clone();
tokio::task::spawn_blocking(move || {
inner.read_to_end().map(|bytes| bytes.into_owned())
})
.await?
tokio::task::spawn_blocking(move || inner.read_to_end()).await?
}

fn chmod_sync(self: Rc<Self>, _pathmode: u32) -> FsResult<()> {
Expand Down
9 changes: 9 additions & 0 deletions cli/util/text_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@ use deno_core::ModuleSourceCode;
static SOURCE_MAP_PREFIX: &[u8] =
b"//# sourceMappingURL=data:application/json;base64,";

#[inline(always)]
pub fn from_utf8_lossy_cow(bytes: Cow<[u8]>) -> Cow<str> {
match bytes {
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
Cow::Owned(bytes) => Cow::Owned(from_utf8_lossy_owned(bytes)),
}
}

#[inline(always)]
pub fn from_utf8_lossy_owned(bytes: Vec<u8>) -> String {
match String::from_utf8_lossy(&bytes) {
Cow::Owned(code) => code,
Expand Down
7 changes: 4 additions & 3 deletions ext/fs/in_memory_fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// Allow using Arc for this module.
#![allow(clippy::disallowed_types)]

use std::borrow::Cow;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::io::Error;
Expand Down Expand Up @@ -457,11 +458,11 @@ impl FileSystem for InMemoryFs {
&self,
path: &Path,
_access_check: Option<AccessCheckCb>,
) -> FsResult<Vec<u8>> {
) -> FsResult<Cow<'static, [u8]>> {
let entry = self.get_entry(path);
match entry {
Some(entry) => match &*entry {
PathEntry::File(data) => Ok(data.clone()),
PathEntry::File(data) => Ok(Cow::Owned(data.clone())),
PathEntry::Dir => Err(FsError::Io(Error::new(
ErrorKind::InvalidInput,
"Is a directory",
Expand All @@ -474,7 +475,7 @@ impl FileSystem for InMemoryFs {
&'a self,
path: PathBuf,
access_check: Option<AccessCheckCb<'a>>,
) -> FsResult<Vec<u8>> {
) -> FsResult<Cow<'static, [u8]>> {
self.read_file_sync(&path, access_check)
}
}
21 changes: 15 additions & 6 deletions ext/fs/interface.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.

use core::str;
use std::borrow::Cow;
use std::path::Path;
use std::path::PathBuf;
Expand Down Expand Up @@ -288,7 +289,7 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync {
&self,
path: &Path,
access_check: Option<AccessCheckCb>,
) -> FsResult<Vec<u8>> {
) -> FsResult<Cow<'static, [u8]>> {
let options = OpenOptions::read();
let file = self.open_sync(path, options, access_check)?;
let buf = file.read_all_sync()?;
Expand All @@ -298,7 +299,7 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync {
&'a self,
path: PathBuf,
access_check: Option<AccessCheckCb<'a>>,
) -> FsResult<Vec<u8>> {
) -> FsResult<Cow<'static, [u8]>> {
let options = OpenOptions::read();
let file = self.open_async(path, options, access_check).await?;
let buf = file.read_all_async().await?;
Expand Down Expand Up @@ -327,17 +328,25 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync {
&self,
path: &Path,
access_check: Option<AccessCheckCb>,
) -> FsResult<String> {
) -> FsResult<Cow<'static, str>> {
let buf = self.read_file_sync(path, access_check)?;
Ok(string_from_utf8_lossy(buf))
Ok(string_from_cow_utf8_lossy(buf))
}
async fn read_text_file_lossy_async<'a>(
&'a self,
path: PathBuf,
access_check: Option<AccessCheckCb<'a>>,
) -> FsResult<String> {
) -> FsResult<Cow<'static, str>> {
let buf = self.read_file_async(path, access_check).await?;
Ok(string_from_utf8_lossy(buf))
Ok(string_from_cow_utf8_lossy(buf))
}
}

#[inline(always)]
fn string_from_cow_utf8_lossy(buf: Cow<'static, [u8]>) -> Cow<'static, str> {
match buf {
Cow::Owned(buf) => Cow::Owned(string_from_utf8_lossy(buf)),
Cow::Borrowed(buf) => String::from_utf8_lossy(buf),
}
}

Expand Down
1 change: 1 addition & 0 deletions ext/fs/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pub use crate::interface::OpenOptions;
pub use crate::ops::FsOpsError;
pub use crate::ops::FsOpsErrorKind;
pub use crate::ops::OperationError;
pub use crate::ops::V8MaybeStaticStr;
pub use crate::std_fs::RealFs;
pub use crate::sync::MaybeSend;
pub use crate::sync::MaybeSync;
Expand Down
Loading

0 comments on commit fdba58c

Please sign in to comment.