Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make source tarball generation more reproducible #123246

Merged
merged 5 commits into from
Mar 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions src/bootstrap/src/core/build_steps/dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -995,9 +995,9 @@ impl Step for PlainSourceTarball {
if builder.rust_info().is_managed_git_subrepository()
|| builder.rust_info().is_from_tarball()
{
if builder.rust_info().is_managed_git_subrepository() {
// Ensure we have the submodules checked out.
builder.update_submodule(Path::new("src/tools/cargo"));
// Ensure we have all submodules from src and other directories checked out.
for submodule in builder.get_all_submodules() {
builder.update_submodule(Path::new(submodule));
}

// Vendor all Cargo dependencies
Expand Down Expand Up @@ -1028,6 +1028,20 @@ impl Step for PlainSourceTarball {
builder.create(&cargo_config_dir.join("config.toml"), &config);
}

// Delete extraneous directories
Kobzol marked this conversation as resolved.
Show resolved Hide resolved
// FIXME: if we're managed by git, we should probably instead ask git if the given path
// is managed by it?
for entry in walkdir::WalkDir::new(tarball.image_dir())
.follow_links(true)
.into_iter()
.filter_map(|e| e.ok())
{
if entry.path().is_dir() && entry.path().file_name() == Some(OsStr::new("__pycache__"))
{
t!(fs::remove_dir_all(entry.path()));
}
}

tarball.bare()
}
}
Expand Down
55 changes: 32 additions & 23 deletions src/bootstrap/src/core/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -554,29 +554,7 @@ impl<'a> ShouldRun<'a> {
///
/// [`path`]: ShouldRun::path
pub fn paths(mut self, paths: &[&str]) -> Self {
static SUBMODULES_PATHS: OnceLock<Vec<String>> = OnceLock::new();

let init_submodules_paths = |src: &PathBuf| {
let file = File::open(src.join(".gitmodules")).unwrap();

let mut submodules_paths = vec![];
for line in BufReader::new(file).lines() {
if let Ok(line) = line {
let line = line.trim();

if line.starts_with("path") {
let actual_path =
line.split(' ').last().expect("Couldn't get value of path");
submodules_paths.push(actual_path.to_owned());
}
}
}

submodules_paths
};

let submodules_paths =
SUBMODULES_PATHS.get_or_init(|| init_submodules_paths(&self.builder.src));
let submodules_paths = self.builder.get_all_submodules();

self.paths.insert(PathSet::Set(
paths
Expand Down Expand Up @@ -2151,6 +2129,37 @@ impl<'a> Builder<'a> {
out
}

/// Return paths of all submodules managed by git.
/// If the current checkout is not managed by git, returns an empty slice.
pub fn get_all_submodules(&self) -> &[String] {
if !self.rust_info().is_managed_git_subrepository() {
return &[];
}

static SUBMODULES_PATHS: OnceLock<Vec<String>> = OnceLock::new();

let init_submodules_paths = |src: &PathBuf| {
let file = File::open(src.join(".gitmodules")).unwrap();

let mut submodules_paths = vec![];
for line in BufReader::new(file).lines() {
if let Ok(line) = line {
let line = line.trim();

if line.starts_with("path") {
let actual_path =
line.split(' ').last().expect("Couldn't get value of path");
submodules_paths.push(actual_path.to_owned());
}
}
}

submodules_paths
};

&SUBMODULES_PATHS.get_or_init(|| init_submodules_paths(&self.src))
}

/// Ensure that a given step is built *only if it's supposed to be built by default*, returning
/// its output. This will cache the step, so it's safe (and good!) to call this as often as
/// needed to ensure that all dependencies are build.
Expand Down
12 changes: 9 additions & 3 deletions src/tools/rust-installer/src/tarballer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use anyhow::{bail, Context, Result};
use std::fs::{read_link, symlink_metadata};
use std::io::{BufWriter, Write};
use std::path::Path;
use tar::{Builder, Header};
use tar::{Builder, Header, HeaderMode};
use walkdir::WalkDir;

use crate::{
Expand Down Expand Up @@ -53,14 +53,19 @@ impl Tarballer {
// Sort files by their suffix, to group files with the same name from
// different locations (likely identical) and files with the same
// extension (likely containing similar data).
let (dirs, mut files) = get_recursive_paths(&self.work_dir, &self.input)
// Sorting of file and directory paths also helps with the reproducibility
// of the resulting archive.
let (mut dirs, mut files) = get_recursive_paths(&self.work_dir, &self.input)
.context("failed to collect file paths")?;
dirs.sort();
files.sort_by(|a, b| a.bytes().rev().cmp(b.bytes().rev()));

// Write the tar into both encoded files. We write all directories
// first, so files may be directly created. (See rust-lang/rustup.rs#1092.)
let buf = BufWriter::with_capacity(1024 * 1024, encoder);
let mut builder = Builder::new(buf);
// Make uid, gid and mtime deterministic to improve reproducibility
builder.mode(HeaderMode::Deterministic);

let pool = rayon::ThreadPoolBuilder::new().num_threads(2).build().unwrap();
pool.install(move || {
Expand Down Expand Up @@ -91,7 +96,8 @@ impl Tarballer {
fn append_path<W: Write>(builder: &mut Builder<W>, src: &Path, path: &String) -> Result<()> {
let stat = symlink_metadata(src)?;
let mut header = Header::new_gnu();
header.set_metadata(&stat);
header.set_metadata_in_mode(&stat, HeaderMode::Deterministic);

if stat.file_type().is_symlink() {
let link = read_link(src)?;
builder.append_link(&mut header, path, &link)?;
Expand Down
Loading