Skip to content

Commit

Permalink
Auto merge of #107723 - Kobzol:bootstrap-bolt, r=Mark-Simulacrum
Browse files Browse the repository at this point in the history
Apply BOLT optimizations without rebuilding LLVM

This PR adds an explicit BOLT bootstrap step which applies BOLT on the fly when LLVM artifacts are copied to a sysroot (it only does this once per bootstrap invocation, the result is cached).  This avoids one LLVM rebuild in the Linux CI dist build.

r? `@jyn514`
  • Loading branch information
bors committed Mar 5, 2023
2 parents a512c6c + 9aad2ad commit 35636f9
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 43 deletions.
27 changes: 9 additions & 18 deletions src/bootstrap/bolt.rs
Original file line number Diff line number Diff line change
@@ -1,46 +1,40 @@
use std::path::Path;
use std::process::Command;

/// Uses the `llvm-bolt` binary to instrument the binary/library at the given `path` with BOLT.
/// Uses the `llvm-bolt` binary to instrument the artifact at the given `path` with BOLT.
/// When the instrumented artifact is executed, it will generate BOLT profiles into
/// `/tmp/prof.fdata.<pid>.fdata`.
pub fn instrument_with_bolt_inplace(path: &Path) {
let dir = std::env::temp_dir();
let instrumented_path = dir.join("instrumented.so");

/// Creates the instrumented artifact at `output_path`.
pub fn instrument_with_bolt(path: &Path, output_path: &Path) {
let status = Command::new("llvm-bolt")
.arg("-instrument")
.arg(&path)
// Make sure that each process will write its profiles into a separate file
.arg("--instrumentation-file-append-pid")
.arg("-o")
.arg(&instrumented_path)
.arg(output_path)
.status()
.expect("Could not instrument artifact using BOLT");

if !status.success() {
panic!("Could not instrument {} with BOLT, exit code {:?}", path.display(), status.code());
}

std::fs::copy(&instrumented_path, path).expect("Cannot copy instrumented artifact");
std::fs::remove_file(instrumented_path).expect("Cannot delete instrumented artifact");
}

/// Uses the `llvm-bolt` binary to optimize the binary/library at the given `path` with BOLT,
/// Uses the `llvm-bolt` binary to optimize the artifact at the given `path` with BOLT,
/// using merged profiles from `profile_path`.
///
/// The recorded profiles have to be merged using the `merge-fdata` tool from LLVM and the merged
/// profile path should be then passed to this function.
pub fn optimize_library_with_bolt_inplace(path: &Path, profile_path: &Path) {
let dir = std::env::temp_dir();
let optimized_path = dir.join("optimized.so");

///
/// Creates the optimized artifact at `output_path`.
pub fn optimize_with_bolt(path: &Path, profile_path: &Path, output_path: &Path) {
let status = Command::new("llvm-bolt")
.arg(&path)
.arg("-data")
.arg(&profile_path)
.arg("-o")
.arg(&optimized_path)
.arg(output_path)
// Reorder basic blocks within functions
.arg("-reorder-blocks=ext-tsp")
// Reorder functions within the binary
Expand All @@ -65,7 +59,4 @@ pub fn optimize_library_with_bolt_inplace(path: &Path, profile_path: &Path) {
if !status.success() {
panic!("Could not optimize {} with BOLT, exit code {:?}", path.display(), status.code());
}

std::fs::copy(&optimized_path, path).expect("Cannot copy optimized artifact");
std::fs::remove_file(optimized_path).expect("Cannot delete optimized artifact");
}
135 changes: 134 additions & 1 deletion src/bootstrap/dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ use std::process::Command;

use object::read::archive::ArchiveFile;
use object::BinaryFormat;
use sha2::Digest;

use crate::bolt::{instrument_with_bolt, optimize_with_bolt};
use crate::builder::{Builder, Kind, RunConfig, ShouldRun, Step};
use crate::cache::{Interned, INTERNER};
use crate::channel;
Expand Down Expand Up @@ -1904,6 +1906,26 @@ fn add_env(builder: &Builder<'_>, cmd: &mut Command, target: TargetSelection) {
}
}

fn install_llvm_file(builder: &Builder<'_>, source: &Path, destination: &Path) {
if builder.config.dry_run() {
return;
}

// After LLVM is built, we modify (instrument or optimize) the libLLVM.so library file.
// This is not done in-place so that the built LLVM files are not "tainted" with BOLT.
// We perform the instrumentation/optimization here, on the fly, just before they are being
// packaged into some destination directory.
let postprocessed = if builder.config.llvm_bolt_profile_generate {
builder.ensure(BoltInstrument::new(source.to_path_buf()))
} else if let Some(path) = &builder.config.llvm_bolt_profile_use {
builder.ensure(BoltOptimize::new(source.to_path_buf(), path.into()))
} else {
source.to_path_buf()
};

builder.install(&postprocessed, destination, 0o644);
}

/// Maybe add LLVM object files to the given destination lib-dir. Allows either static or dynamic linking.
///
/// Returns whether the files were actually copied.
Expand Down Expand Up @@ -1955,7 +1977,7 @@ fn maybe_install_llvm(builder: &Builder<'_>, target: TargetSelection, dst_libdir
} else {
PathBuf::from(file)
};
builder.install(&file, dst_libdir, 0o644);
install_llvm_file(builder, &file, dst_libdir);
}
!builder.config.dry_run()
} else {
Expand Down Expand Up @@ -1986,6 +2008,117 @@ pub fn maybe_install_llvm_runtime(builder: &Builder<'_>, target: TargetSelection
}
}

/// Creates an output path to a BOLT-manipulated artifact for the given `file`.
/// The hash of the file is used to make sure that we don't mix BOLT artifacts amongst different
/// files with the same name.
///
/// We need to keep the file-name the same though, to make sure that copying the manipulated file
/// to a directory will not change the final file path.
fn create_bolt_output_path(builder: &Builder<'_>, file: &Path, hash: &str) -> PathBuf {
let directory = builder.out.join("bolt").join(hash);
t!(fs::create_dir_all(&directory));
directory.join(file.file_name().unwrap())
}

/// Instrument the provided file with BOLT.
/// Returns a path to the instrumented artifact.
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct BoltInstrument {
file: PathBuf,
hash: String,
}

impl BoltInstrument {
fn new(file: PathBuf) -> Self {
let mut hasher = sha2::Sha256::new();
hasher.update(t!(fs::read(&file)));
let hash = hex::encode(hasher.finalize().as_slice());

Self { file, hash }
}
}

impl Step for BoltInstrument {
type Output = PathBuf;

const ONLY_HOSTS: bool = false;
const DEFAULT: bool = false;

fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
run.never()
}

fn run(self, builder: &Builder<'_>) -> PathBuf {
if builder.build.config.dry_run() {
return self.file.clone();
}

if builder.build.config.llvm_from_ci {
println!("warning: trying to use BOLT with LLVM from CI, this will probably not work");
}

println!("Instrumenting {} with BOLT", self.file.display());

let output_path = create_bolt_output_path(builder, &self.file, &self.hash);
if !output_path.is_file() {
instrument_with_bolt(&self.file, &output_path);
}
output_path
}
}

/// Optimize the provided file with BOLT.
/// Returns a path to the optimized artifact.
///
/// The hash is stored in the step to make sure that we don't optimize the same file
/// twice (even under different file paths).
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct BoltOptimize {
file: PathBuf,
profile: PathBuf,
hash: String,
}

impl BoltOptimize {
fn new(file: PathBuf, profile: PathBuf) -> Self {
let mut hasher = sha2::Sha256::new();
hasher.update(t!(fs::read(&file)));
hasher.update(t!(fs::read(&profile)));
let hash = hex::encode(hasher.finalize().as_slice());

Self { file, profile, hash }
}
}

impl Step for BoltOptimize {
type Output = PathBuf;

const ONLY_HOSTS: bool = false;
const DEFAULT: bool = false;

fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
run.never()
}

fn run(self, builder: &Builder<'_>) -> PathBuf {
if builder.build.config.dry_run() {
return self.file.clone();
}

if builder.build.config.llvm_from_ci {
println!("warning: trying to use BOLT with LLVM from CI, this will probably not work");
}

println!("Optimizing {} with BOLT", self.file.display());

let output_path = create_bolt_output_path(builder, &self.file, &self.hash);
if !output_path.is_file() {
optimize_with_bolt(&self.file, &self.profile, &output_path);
}
output_path
}
}

#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct LlvmTools {
pub target: TargetSelection,
Expand Down
23 changes: 0 additions & 23 deletions src/bootstrap/native.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ use std::io;
use std::path::{Path, PathBuf};
use std::process::Command;

use crate::bolt::{instrument_with_bolt_inplace, optimize_library_with_bolt_inplace};
use crate::builder::{Builder, RunConfig, ShouldRun, Step};
use crate::channel;
use crate::config::{Config, TargetSelection};
Expand Down Expand Up @@ -523,34 +522,12 @@ impl Step for Llvm {
}
}

// After LLVM is built, we modify (instrument or optimize) the libLLVM.so library file
// in place. This is fine, because currently we do not support incrementally rebuilding
// LLVM after a configuration change, so to rebuild it the build files have to be removed,
// which will also remove these modified files.
if builder.config.llvm_bolt_profile_generate {
instrument_with_bolt_inplace(&get_built_llvm_lib_path(&res.llvm_config));
}
if let Some(path) = &builder.config.llvm_bolt_profile_use {
optimize_library_with_bolt_inplace(
&get_built_llvm_lib_path(&res.llvm_config),
&Path::new(path),
);
}

t!(stamp.write());

res
}
}

/// Returns path to a built LLVM library (libLLVM.so).
/// Assumes that we have built LLVM into a single library file.
fn get_built_llvm_lib_path(llvm_config_path: &Path) -> PathBuf {
let mut cmd = Command::new(llvm_config_path);
cmd.arg("--libfiles");
PathBuf::from(output(&mut cmd).trim())
}

fn check_llvm_version(builder: &Builder<'_>, llvm_config: &Path) {
if !builder.config.llvm_version_check {
return;
Expand Down
4 changes: 3 additions & 1 deletion src/ci/stage-build.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,14 +798,16 @@ def execute_build_pipeline(timer: Timer, pipeline: Pipeline, final_build_args: L
"--llvm-profile-use",
pipeline.llvm_profile_merged_file(),
"--llvm-bolt-profile-generate",
"--rust-profile-use",
pipeline.rustc_profile_merged_file()
])
record_metrics(pipeline, rustc_build)

with stage3.section("Gather profiles"):
gather_llvm_bolt_profiles(pipeline)

# LLVM is not being cleared here, we want to reuse the previous build
print_free_disk_space(pipeline)
clear_llvm_files(pipeline)
final_build_args += [
"--llvm-bolt-profile-use",
pipeline.llvm_bolt_profile_merged_file()
Expand Down

0 comments on commit 35636f9

Please sign in to comment.