Skip to content

Commit

Permalink
feat!: replace FileSystem::canonicalize with FileSystem::read_link
Browse files Browse the repository at this point in the history
  • Loading branch information
Boshen committed Dec 11, 2024
1 parent 8844add commit aa12e6b
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 109 deletions.
76 changes: 55 additions & 21 deletions src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,28 @@ use std::{
io,
ops::Deref,
path::{Component, Path, PathBuf},
sync::Arc,
sync::{
atomic::{AtomicU64, Ordering},
Arc,
},
};

use dashmap::{DashMap, DashSet};
use once_cell::sync::OnceCell as OnceLock;
use rustc_hash::FxHasher;

use crate::{
context::ResolveContext as Ctx, package_json::PackageJson, FileMetadata, FileSystem,
ResolveError, ResolveOptions, TsConfig,
context::ResolveContext as Ctx, package_json::PackageJson, path::PathUtil, FileMetadata,
FileSystem, ResolveError, ResolveOptions, TsConfig,
};

static THREAD_COUNT: AtomicU64 = AtomicU64::new(1);

thread_local! {
/// Per-thread pre-allocated path that is used to perform operations on paths more quickly.
/// Learned from parcel <https://github.com/parcel-bundler/parcel/blob/a53f8f3ba1025c7ea8653e9719e0a61ef9717079/crates/parcel-resolver/src/cache.rs#L394>
pub static SCRATCH_PATH: UnsafeCell<PathBuf> = UnsafeCell::new(PathBuf::with_capacity(256));
pub static THREAD_ID: u64 = THREAD_COUNT.fetch_add(1, Ordering::SeqCst);
}

#[derive(Default)]
Expand Down Expand Up @@ -113,7 +119,8 @@ pub struct CachedPathImpl {
path: Box<Path>,
parent: Option<CachedPath>,
meta: OnceLock<Option<FileMetadata>>,
canonicalized: OnceLock<Option<CachedPath>>,
canonicalized: OnceLock<Result<CachedPath, ResolveError>>,
canonicalizing: AtomicU64,
node_modules: OnceLock<Option<CachedPath>>,
package_json: OnceLock<Option<(CachedPath, Arc<PackageJson>)>>,
}
Expand All @@ -126,6 +133,7 @@ impl CachedPathImpl {
parent,
meta: OnceLock::new(),
canonicalized: OnceLock::new(),
canonicalizing: AtomicU64::new(0),
node_modules: OnceLock::new(),
package_json: OnceLock::new(),
}
Expand Down Expand Up @@ -208,23 +216,49 @@ impl CachedPath {
)
}

pub fn realpath<Fs: FileSystem>(&self, cache: &Cache<Fs>) -> io::Result<Self> {
self.canonicalized
.get_or_try_init(|| {
if cache.fs.symlink_metadata(&self.path).is_ok_and(|m| m.is_symlink) {
let canonicalized = cache.fs.canonicalize(&self.path)?;
return Ok(Some(cache.value(&canonicalized)));
}
if let Some(parent) = self.parent() {
let parent_path = parent.realpath(cache)?;
let normalized = parent_path
.normalize_with(self.path.strip_prefix(&parent.path).unwrap(), cache);
return Ok(Some(normalized));
};
Ok(None)
/// Returns the canonical path, resolving all symbolic links.
///
/// <https://github.com/parcel-bundler/parcel/blob/4d27ec8b8bd1792f536811fef86e74a31fa0e704/crates/parcel-resolver/src/cache.rs#L232>
pub fn canonicalize<Fs: FileSystem>(&self, cache: &Cache<Fs>) -> Result<Self, ResolveError> {
// Check if this thread is already canonicalizing. If so, we have found a circular symlink.
// If a different thread is canonicalizing, OnceLock will queue this thread to wait for the result.
let tid = THREAD_ID.with(|t| *t);
if self.0.canonicalizing.load(Ordering::Acquire) == tid {
return Err(io::Error::new(io::ErrorKind::NotFound, "Circular symlink").into());

Check warning on line 227 in src/cache.rs

View check run for this annotation

Codecov / codecov/patch

src/cache.rs#L227

Added line #L227 was not covered by tests
}

self.0
.canonicalized
.get_or_init(|| {
self.0.canonicalizing.store(tid, Ordering::Release);

let res = self.parent().map_or_else(
|| Ok(self.clone()),
|parent| {
parent.canonicalize(cache).and_then(|parent_canonical| {
let path = parent_canonical.normalize_with(
self.path().strip_prefix(parent.path()).unwrap(),
cache,
);
if cache.fs.symlink_metadata(self.path()).is_ok_and(|m| m.is_symlink) {
let link = cache.fs.read_link(path.path())?;
if link.is_absolute() {
return cache.value(&link.normalize()).canonicalize(cache);
} else if let Some(dir) = path.parent() {
// Symlink is relative `../../foo.js`, use the path directory
// to resolve this symlink.
return dir.normalize_with(&link, cache).canonicalize(cache);
}

Check warning on line 251 in src/cache.rs

View check run for this annotation

Codecov / codecov/patch

src/cache.rs#L251

Added line #L251 was not covered by tests
}
Ok(path)
})
},
);

self.0.canonicalizing.store(0, Ordering::Release);
res
})
.cloned()
.map(|r| r.unwrap_or_else(|| self.clone()))
.clone()
}

pub fn module_directory<Fs: FileSystem>(
Expand Down Expand Up @@ -265,7 +299,7 @@ impl CachedPath {
return Ok(None);
};
let real_path = if options.symlinks {
self.realpath(cache)?.path().join("package.json")
self.canonicalize(cache)?.path().join("package.json")
} else {
package_json_path.clone()
};
Expand Down
93 changes: 7 additions & 86 deletions src/file_system.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,12 @@ pub trait FileSystem: Send + Sync {
/// napi env.
fn symlink_metadata(&self, path: &Path) -> io::Result<FileMetadata>;

/// See [std::fs::canonicalize]
/// Returns the resolution of a symbolic link.
///
/// # Errors
///
/// See [std::fs::read_link]
/// ## Warning
/// Use `&Path` instead of a generic `P: AsRef<Path>` here,
/// because object safety requirements, it is especially useful, when
/// you want to store multiple `dyn FileSystem` in a `Vec` or use a `ResolverGeneric<Fs>` in
/// napi env.
fn canonicalize(&self, path: &Path) -> io::Result<PathBuf>;
fn read_link(&self, path: &Path) -> io::Result<PathBuf>;
}

/// Metadata information about a file
Expand Down Expand Up @@ -165,16 +160,16 @@ impl FileSystem for FileSystemOs {
fs::symlink_metadata(path).map(FileMetadata::from)
}

fn canonicalize(&self, path: &Path) -> io::Result<PathBuf> {
fn read_link(&self, path: &Path) -> io::Result<PathBuf> {
cfg_if! {
if #[cfg(feature = "yarn_pnp")] {
match VPath::from(path)? {
VPath::Zip(info) => fast_canonicalize(info.physical_base_path().join(info.zip_path)),
VPath::Virtual(info) => fast_canonicalize(info.physical_base_path()),
VPath::Native(path) => fast_canonicalize(path),
VPath::Zip(info) => fs::read_link(info.physical_base_path().join(info.zip_path)),
VPath::Virtual(info) => fs::read_link(info.physical_base_path()),
VPath::Native(path) => fs::read_link(path),
}
} else {
fast_canonicalize(path)
fs::read_link(path)
}
}
}
Expand All @@ -189,77 +184,3 @@ fn metadata() {
);
let _ = meta;
}

#[inline]
fn fast_canonicalize<P: AsRef<Path>>(path: P) -> io::Result<PathBuf> {
#[cfg(windows)]
{
// fs::canonicalize was faster on Windows (https://github.com/oxc-project/oxc-resolver/pull/306)
Ok(node_compatible_raw_canonicalize(fs::canonicalize(path)?))
}
#[cfg(not(windows))]
{
fast_canonicalize_non_windows(path.as_ref().to_path_buf())
}
}

#[inline]
#[cfg(not(windows))]
// This is A faster fs::canonicalize implementation by reducing the number of syscalls
fn fast_canonicalize_non_windows(path: PathBuf) -> io::Result<PathBuf> {
use std::path::Component;
let mut path_buf = path;

loop {
let link = fs::read_link(&path_buf)?;
path_buf.pop();
if fs::symlink_metadata(&path_buf)?.is_symlink() {
path_buf = fast_canonicalize(path_buf)?;
}
for component in link.components() {
match component {
Component::ParentDir => {
path_buf.pop();
}
Component::Normal(seg) => {
#[cfg(target_family = "wasm")]
{
// Need to trim the extra \0 introduces by https://github.com/nodejs/uvwasi/issues/262
path_buf.push(seg.to_string_lossy().trim_end_matches('\0'));
}
#[cfg(not(target_family = "wasm"))]
{
path_buf.push(seg);
}
}
Component::RootDir => {
path_buf = PathBuf::from("/");
}
Component::CurDir | Component::Prefix(_) => {}
}

if fs::symlink_metadata(&path_buf)?.is_symlink() {
path_buf = fast_canonicalize(path_buf)?;
}
}
if !fs::symlink_metadata(&path_buf)?.is_symlink() {
break;
}
}
Ok(path_buf)
}

#[cfg(windows)]
fn node_compatible_raw_canonicalize<P: AsRef<Path>>(path: P) -> PathBuf {
let path_bytes = path.as_ref().as_os_str().as_encoded_bytes();
path_bytes
.strip_prefix(UNC_PATH_PREFIX)
.or_else(|| path_bytes.strip_prefix(LONG_PATH_PREFIX))
.map_or_else(
|| path.as_ref().to_path_buf(),
|p| {
// SAFETY: `as_encoded_bytes` ensures `p` is valid path bytes
unsafe { PathBuf::from(std::ffi::OsStr::from_encoded_bytes_unchecked(p)) }
},
)
}
5 changes: 4 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,10 @@ impl<Fs: FileSystem> ResolverGeneric<Fs> {

fn load_realpath(&self, cached_path: &CachedPath) -> Result<PathBuf, ResolveError> {
if self.options.symlinks {
cached_path.realpath(&self.cache).map(|c| c.to_path_buf()).map_err(ResolveError::from)
cached_path
.canonicalize(&self.cache)
.map(|c| c.to_path_buf())
.map_err(ResolveError::from)
} else {
Ok(cached_path.to_path_buf())
}
Expand Down
2 changes: 1 addition & 1 deletion src/tests/memory_fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ impl FileSystem for MemoryFS {
self.metadata(path)
}

fn canonicalize(&self, _path: &Path) -> io::Result<PathBuf> {
fn read_link(&self, _path: &Path) -> io::Result<PathBuf> {
Err(io::Error::new(io::ErrorKind::NotFound, "not a symlink"))
}
}

0 comments on commit aa12e6b

Please sign in to comment.