Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PVF: Fix unshare "no such file or directory" error #2426

Merged
merged 6 commits into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions polkadot/node/core/pvf/src/artifacts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -499,8 +499,7 @@ mod tests {

#[tokio::test]
async fn remove_stale_cache_on_startup() {
let cache_dir = crate::worker_intf::tmppath("test-cache").await.unwrap();
fs::create_dir_all(&cache_dir).unwrap();
let cache_dir = tempfile::Builder::new().prefix("test-cache-").tempdir().unwrap();

// invalid prefix
create_rand_artifact(&cache_dir, "");
Expand Down Expand Up @@ -529,7 +528,7 @@ mod tests {

assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 7);

let artifacts = Artifacts::new_and_prune(&cache_dir).await;
let artifacts = Artifacts::new_and_prune(cache_dir.path()).await;

assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 1);
assert_eq!(artifacts.len(), 1);
Expand Down
4 changes: 2 additions & 2 deletions polkadot/node/core/pvf/src/execute/worker_intf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ where
// Cheaply create a hard link to the artifact. The artifact is always at a known location in the
// worker cache, and the child can't access any other artifacts or gain any information from the
// original filename.
let link_path = worker_dir::execute_artifact(&worker_dir.path);
let link_path = worker_dir::execute_artifact(worker_dir.path());
if let Err(err) = tokio::fs::hard_link(artifact_path, link_path).await {
gum::warn!(
target: LOG_TARGET,
Expand All @@ -292,7 +292,7 @@ where
}
}

let worker_dir_path = worker_dir.path.clone();
let worker_dir_path = worker_dir.path().to_owned();
let outcome = f(worker_dir).await;

// Try to clear the worker dir.
Expand Down
4 changes: 2 additions & 2 deletions polkadot/node/core/pvf/src/prepare/worker_intf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ where
{
// Create the tmp file here so that the child doesn't need any file creation rights. This will
// be cleared at the end of this function.
let tmp_file = worker_dir::prepare_tmp_artifact(&worker_dir.path);
let tmp_file = worker_dir::prepare_tmp_artifact(worker_dir.path());
if let Err(err) = tokio::fs::File::create(&tmp_file).await {
gum::warn!(
target: LOG_TARGET,
Expand All @@ -333,7 +333,7 @@ where
}
};

let worker_dir_path = worker_dir.path.clone();
let worker_dir_path = worker_dir.path().to_owned();
let outcome = f(tmp_file, stream, worker_dir).await;

// Try to clear the worker dir.
Expand Down
17 changes: 7 additions & 10 deletions polkadot/node/core/pvf/src/security.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,18 +158,15 @@ async fn check_can_unshare_user_namespace_and_change_root(
) -> SecureModeResult {
cfg_if::cfg_if! {
if #[cfg(target_os = "linux")] {
let cache_dir_tempdir =
crate::worker_intf::tmppath_in("check-can-unshare", cache_path)
.await
.map_err(
|err|
SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
format!("could not create a temporary directory in {:?}: {}", cache_path, err)
)
)?;
let cache_dir_tempdir = tempfile::Builder::new()
.prefix("check-can-unshare-")
.tempdir_in(cache_path)
.map_err(|err| SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
format!("could not create a temporary directory in {:?}: {}", cache_path, err)
))?;
match tokio::process::Command::new(prepare_worker_program_path)
.arg("--check-can-unshare-user-namespace-and-change-root")
.arg(cache_dir_tempdir)
.arg(cache_dir_tempdir.path())
.output()
.await
{
Expand Down
105 changes: 47 additions & 58 deletions polkadot/node/core/pvf/src/worker_intf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ pub async fn spawn_with_program_path(

with_transient_socket_path(debug_id, |socket_path| {
let socket_path = socket_path.to_owned();
let worker_dir_path = worker_dir.path().to_owned();

async move {
let listener = UnixListener::bind(&socket_path).map_err(|err| {
Expand All @@ -91,7 +92,7 @@ pub async fn spawn_with_program_path(
&program_path,
&extra_args,
&socket_path,
&worker_dir.path,
&worker_dir_path,
security_status,
)
.map_err(|err| {
Expand All @@ -100,15 +101,14 @@ pub async fn spawn_with_program_path(
%debug_id,
?program_path,
?extra_args,
?worker_dir.path,
?worker_dir_path,
?socket_path,
"cannot spawn a worker: {:?}",
err,
);
SpawnErr::ProcessSpawn
})?;

let worker_dir_path = worker_dir.path.clone();
futures::select! {
accept_result = listener.accept().fuse() => {
let (stream, _) = accept_result.map_err(|err| {
Expand Down Expand Up @@ -150,7 +150,42 @@ where
F: FnOnce(&Path) -> Fut,
Fut: futures::Future<Output = Result<T, SpawnErr>> + 'static,
{
let socket_path = tmppath(&format!("pvf-host-{}", debug_id))
/// Returns a path under [`std::env::temp_dir`]. The path name will start with the given prefix.
///
/// There is only a certain number of retries. If exceeded this function will give up and return
/// an error.
pub async fn tmppath(prefix: &str) -> io::Result<PathBuf> {
mrcnski marked this conversation as resolved.
Show resolved Hide resolved
fn make_tmppath(prefix: &str, dir: &Path) -> PathBuf {
use rand::distributions::Alphanumeric;

const DESCRIMINATOR_LEN: usize = 10;

let mut buf = Vec::with_capacity(prefix.len() + DESCRIMINATOR_LEN);
buf.extend(prefix.as_bytes());
buf.extend(rand::thread_rng().sample_iter(&Alphanumeric).take(DESCRIMINATOR_LEN));

let s = std::str::from_utf8(&buf)
.expect("the string is collected from a valid utf-8 sequence; qed");

let mut path = dir.to_owned();
path.push(s);
path
}

const NUM_RETRIES: usize = 50;

let dir = std::env::temp_dir();
for _ in 0..NUM_RETRIES {
let tmp_path = make_tmppath(prefix, &dir);
if !tmp_path.exists() {
return Ok(tmp_path)
}
}

Err(io::Error::new(io::ErrorKind::Other, "failed to create a temporary path"))
}

let socket_path = tmppath(&format!("pvf-host-{}-", debug_id))
.await
.map_err(|_| SpawnErr::TmpPath)?;
let result = f(&socket_path).await;
Expand All @@ -162,46 +197,6 @@ where
result
}

/// Returns a path under the given `dir`. The path name will start with the given prefix.
///
/// There is only a certain number of retries. If exceeded this function will give up and return an
/// error.
pub async fn tmppath_in(prefix: &str, dir: &Path) -> io::Result<PathBuf> {
fn make_tmppath(prefix: &str, dir: &Path) -> PathBuf {
use rand::distributions::Alphanumeric;

const DESCRIMINATOR_LEN: usize = 10;

let mut buf = Vec::with_capacity(prefix.len() + DESCRIMINATOR_LEN);
buf.extend(prefix.as_bytes());
buf.extend(rand::thread_rng().sample_iter(&Alphanumeric).take(DESCRIMINATOR_LEN));

let s = std::str::from_utf8(&buf)
.expect("the string is collected from a valid utf-8 sequence; qed");

let mut path = dir.to_owned();
path.push(s);
path
}

const NUM_RETRIES: usize = 50;

for _ in 0..NUM_RETRIES {
let tmp_path = make_tmppath(prefix, dir);
if !tmp_path.exists() {
return Ok(tmp_path)
}
}

Err(io::Error::new(io::ErrorKind::Other, "failed to create a temporary path"))
}

/// The same as [`tmppath_in`], but uses [`std::env::temp_dir`] as the directory.
pub async fn tmppath(prefix: &str) -> io::Result<PathBuf> {
let temp_dir = std::env::temp_dir();
tmppath_in(prefix, &temp_dir).await
}

/// A struct that represents an idle worker.
///
/// This struct is supposed to be used as a token that is passed by move into a subroutine that
Expand All @@ -224,8 +219,6 @@ pub struct IdleWorker {
pub enum SpawnErr {
/// Cannot obtain a temporary path location.
TmpPath,
/// An FS error occurred.
Fs(String),
/// Cannot bind the socket to the given path.
Bind,
/// An error happened during accepting a connection to the socket.
Expand Down Expand Up @@ -419,26 +412,22 @@ pub async fn framed_recv(r: &mut (impl AsyncRead + Unpin)) -> io::Result<Vec<u8>
/// ```
#[derive(Debug)]
pub struct WorkerDir {
pub path: PathBuf,
tempdir: tempfile::TempDir,
}

impl WorkerDir {
/// Creates a new, empty worker dir with a random name in the given cache dir.
pub async fn new(debug_id: &'static str, cache_dir: &Path) -> Result<Self, SpawnErr> {
let prefix = format!("worker-dir-{}-", debug_id);
let path = tmppath_in(&prefix, cache_dir).await.map_err(|_| SpawnErr::TmpPath)?;
tokio::fs::create_dir(&path)
.await
.map_err(|err| SpawnErr::Fs(err.to_string()))?;
Ok(Self { path })
let tempdir = tempfile::Builder::new()
.prefix(&prefix)
.tempdir_in(cache_dir)
.map_err(|_| SpawnErr::TmpPath)?;
Ok(Self { tempdir })
}
}

// Try to clean up the temporary worker dir at the end of the worker's lifetime. It should be wiped
// on startup, but we make a best effort not to leave it around.
impl Drop for WorkerDir {
fn drop(&mut self) {
let _ = std::fs::remove_dir_all(&self.path);
pub fn path(&self) -> &Path {
self.tempdir.path()
}
}

Expand Down