Skip to content

Commit

Permalink
Reapply: [WebAssembly] Implement build-id feature
Browse files Browse the repository at this point in the history
Implement the --build-id flag similarly to ELF, and generate a
build_id section according to the WebAssembly tool convention
specified in WebAssembly/tool-conventions#183

The default style ("fast" aka "tree") hashes the contents of the
output and (unlike ELF) generates a v5 UUID based on the hash (using a
random namespace). It also supports generating a random v4 UUID, a
sha1 hash, and a user-specified string (as ELF does).

Differential Revision: https://reviews.llvm.org/D107662

Fix MSVC build by std::copy on the underying buffer rather than
directly from std::array to llvm::MutableArrayRef
  • Loading branch information
dschuff committed Mar 4, 2023
1 parent 1a1dc44 commit c7af9ae
Show file tree
Hide file tree
Showing 7 changed files with 257 additions and 0 deletions.
60 changes: 60 additions & 0 deletions lld/test/wasm/build-id.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# RUN: llvm-mc -filetype=obj -triple=wasm32 %p/Inputs/start.s -o %t

# RUN: wasm-ld --build-id %t -o %t2
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s
# RUN: wasm-ld --build-id=fast %t -o %t2
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s
# RUN: wasm-ld --build-id %t -o %t2 --threads=1
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s

# RUN: wasm-ld --build-id=sha1 %t -o %t2
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s
# RUN: wasm-ld --build-id=sha1 %t -o %t2 --threads=1
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s

# RUN: wasm-ld --build-id=tree %t -o %t2
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s
# RUN: wasm-ld --build-id=tree %t -o %t2 --threads=1
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s

# RUN: wasm-ld --build-id=uuid %t -o %t2
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=UUID %s

# RUN: wasm-ld --build-id=0x12345678 %t -o %t2
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=HEX %s

# RUN: wasm-ld %t -o %t2
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s

# RUN: wasm-ld --build-id=sha1 --build-id=none %t -o %t2
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s
# RUN: wasm-ld --build-id --build-id=none %t -o %t2
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s
# RUN: wasm-ld --build-id=none --build-id %t -o %t2
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s

.section .data.foo,"",@
.globl foo
.hidden foo
.p2align 2
foo:
.int32 1
.size foo, 4


# DEFAULT: Contents of section build_id:
# DEFAULT-NEXT: 0000 10cdbf99 f76b1f5e ebb2f36a 1bde1d6c .....k.^...j...l
# DEFAULT-NEXT: 0010 01

# SHA1: Contents of section build_id:
# SHA1-NEXT: 0000 14ad22e8 54d72438 94af85de 3c5592bd ..".T.$8....<U..
# SHA1-NEXT: 0010 1b5ec96f 6b .^.ok

# UUID: Contents of section build_id:
# UUID-NEXT: 0000 10

# HEX: Contents of section build_id:
# HEX-NEXT: 0000 04123456 78 ..4Vx


# NONE-NOT: Contents of section build_id:
5 changes: 5 additions & 0 deletions lld/wasm/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ namespace wasm {
// For --unresolved-symbols.
enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic };

// For --build-id.
enum class BuildIdKind { None, Fast, Sha1, Hexstring, Uuid };

// This struct contains the global configuration for the linker.
// Most fields are direct mapping from the command line options
// and such fields have the same name as the corresponding options.
Expand Down Expand Up @@ -72,6 +75,7 @@ struct Configuration {
llvm::StringRef thinLTOJobs;
bool ltoDebugPassManager;
UnresolvedPolicy unresolvedSymbols;
BuildIdKind buildId = BuildIdKind::None;

llvm::StringRef entry;
llvm::StringRef mapFile;
Expand All @@ -85,6 +89,7 @@ struct Configuration {
llvm::CachePruningPolicy thinLTOCachePolicy;
std::optional<std::vector<std::string>> features;
std::optional<std::vector<std::string>> extraFeatures;
llvm::SmallVector<uint8_t, 0> buildIdVector;

// The following config options do not directly correspond to any
// particular command line options.
Expand Down
29 changes: 29 additions & 0 deletions lld/wasm/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,33 @@ static UnresolvedPolicy getUnresolvedSymbolPolicy(opt::InputArgList &args) {
return errorOrWarn;
}

// Parse --build-id or --build-id=<style>. We handle "tree" as a
// synonym for "sha1" because all our hash functions including
// -build-id=sha1 are actually tree hashes for performance reasons.
static std::pair<BuildIdKind, SmallVector<uint8_t, 0>>
getBuildId(opt::InputArgList &args) {
auto *arg = args.getLastArg(OPT_build_id, OPT_build_id_eq);
if (!arg)
return {BuildIdKind::None, {}};

if (arg->getOption().getID() == OPT_build_id)
return {BuildIdKind::Fast, {}};

StringRef s = arg->getValue();
if (s == "fast")
return {BuildIdKind::Fast, {}};
if (s == "sha1" || s == "tree")
return {BuildIdKind::Sha1, {}};
if (s == "uuid")
return {BuildIdKind::Uuid, {}};
if (s.startswith("0x"))
return {BuildIdKind::Hexstring, parseHex(s.substr(2))};

if (s != "none")
error("unknown --build-id style: " + s);
return {BuildIdKind::None, {}};
}

// Initializes Config members by the command line options.
static void readConfigs(opt::InputArgList &args) {
config->bsymbolic = args.hasArg(OPT_Bsymbolic);
Expand Down Expand Up @@ -519,6 +546,8 @@ static void readConfigs(opt::InputArgList &args) {

if (args.hasArg(OPT_print_map))
config->mapFile = "-";

std::tie(config->buildId, config->buildIdVector) = getBuildId(args);
}

// Some Config members do not directly correspond to any particular
Expand Down
5 changes: 5 additions & 0 deletions lld/wasm/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ def Bdynamic: F<"Bdynamic">, HelpText<"Link against shared libraries (default)">

def Bstatic: F<"Bstatic">, HelpText<"Do not link against shared libraries">;

def build_id: F<"build-id">, HelpText<"Alias for --build-id=fast">;

def build_id_eq: J<"build-id=">, HelpText<"Generate build ID note">,
MetaVarName<"[fast,sha1,uuid,0x<hexstring>]">;

defm color_diagnostics: B<"color-diagnostics",
"Alias for --color-diagnostics=always",
"Alias for --color-diagnostics=never">;
Expand Down
34 changes: 34 additions & 0 deletions lld/wasm/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -888,5 +888,39 @@ void RelocSection::writeBody() {
sec->writeRelocations(bodyOutputStream);
}

static size_t getHashSize() {
switch (config->buildId) {
case BuildIdKind::Fast:
case BuildIdKind::Uuid:
return 16;
case BuildIdKind::Sha1:
return 20;
case BuildIdKind::Hexstring:
return config->buildIdVector.size();
case BuildIdKind::None:
return 0;
}
}

BuildIdSection::BuildIdSection()
: SyntheticSection(llvm::wasm::WASM_SEC_CUSTOM, buildIdSectionName),
hashSize(getHashSize()) {}

void BuildIdSection::writeBody() {
LLVM_DEBUG(llvm::dbgs() << "BuildId writebody\n");
// Write hash size
auto &os = bodyOutputStream;
writeUleb128(os, hashSize, "build id size");
writeBytes(os, std::vector<char>(hashSize, ' ').data(), hashSize,
"placeholder");
}

void BuildIdSection::writeBuildId(llvm::ArrayRef<uint8_t> buf) {
assert(buf.size() == hashSize);
LLVM_DEBUG(dbgs() << "buildid write " << buf.size() << " "
<< hashPlaceholderPtr << '\n');
memcpy(hashPlaceholderPtr, buf.data(), hashSize);
}

} // namespace wasm
} // namespace lld
30 changes: 30 additions & 0 deletions lld/wasm/SyntheticSections.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,35 @@ class RelocSection : public SyntheticSection {
OutputSection *sec;
};

class BuildIdSection : public SyntheticSection {
public:
BuildIdSection();
void writeBody() override;
bool isNeeded() const override {
return config->buildId != BuildIdKind::None;
}
void writeBuildId(llvm::ArrayRef<uint8_t> buf);
void writeTo(uint8_t *buf) override {
LLVM_DEBUG(llvm::dbgs()
<< "BuildId writeto buf " << buf << " offset " << offset
<< " headersize " << header.size() << '\n');
// The actual build ID is derived from a hash of all of the output
// sections, so it can't be calculated until they are written. Here
// we write the section leaving zeros in place of the hash.
SyntheticSection::writeTo(buf);
// Calculate and store the location where the hash will be written.
hashPlaceholderPtr = buf + offset + header.size() +
+sizeof(buildIdSectionName) /*name string*/ +
1 /* hash size */;
}

const uint32_t hashSize;

private:
static constexpr char buildIdSectionName[] = "build_id";
uint8_t *hashPlaceholderPtr = nullptr;
};

// Linker generated output sections
struct OutStruct {
DylinkSection *dylinkSec;
Expand All @@ -447,6 +476,7 @@ struct OutStruct {
NameSection *nameSec;
ProducersSection *producersSec;
TargetFeaturesSection *targetFeaturesSec;
BuildIdSection *buildIdSec;
};

extern OutStruct out;
Expand Down
94 changes: 94 additions & 0 deletions lld/wasm/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
#include "SymbolTable.h"
#include "SyntheticSections.h"
#include "WriterUtils.h"
#include "lld/Common/Arrays.h"
#include "lld/Common/CommonLinkerContext.h"
#include "lld/Common/Strings.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
Expand All @@ -30,6 +32,9 @@
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/RandomNumberGenerator.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/xxhash.h"

#include <cstdarg>
#include <map>
Expand Down Expand Up @@ -103,6 +108,7 @@ class Writer {

void writeHeader();
void writeSections();
void writeBuildId();

uint64_t fileSize = 0;

Expand Down Expand Up @@ -219,6 +225,91 @@ void Writer::writeSections() {
});
}

// Computes a hash value of Data using a given hash function.
// In order to utilize multiple cores, we first split data into 1MB
// chunks, compute a hash for each chunk, and then compute a hash value
// of the hash values.

static void
computeHash(llvm::MutableArrayRef<uint8_t> hashBuf,
llvm::ArrayRef<uint8_t> data,
std::function<void(uint8_t *dest, ArrayRef<uint8_t> arr)> hashFn) {
std::vector<ArrayRef<uint8_t>> chunks = split(data, 1024 * 1024);
std::vector<uint8_t> hashes(chunks.size() * hashBuf.size());

// Compute hash values.
parallelFor(0, chunks.size(), [&](size_t i) {
hashFn(hashes.data() + i * hashBuf.size(), chunks[i]);
});

// Write to the final output buffer.
hashFn(hashBuf.data(), hashes);
}

static void makeUUID(unsigned version, llvm::ArrayRef<uint8_t> fileHash,
llvm::MutableArrayRef<uint8_t> output) {
assert(version == 4 || version == 5 && "Unknown UUID version");
assert(output.size() == 16 && "Wrong size for UUID output");
if (version == 5) {
// Build a valid v5 UUID from a hardcoded (randomly-generated) namespace
// UUID, and the computed hash of the output.
std::array<uint8_t, 16> namespaceUUID{0xA1, 0xFA, 0x48, 0x2D, 0x0E, 0x22,
0x03, 0x8D, 0x33, 0x8B, 0x52, 0x1C,
0xD6, 0xD2, 0x12, 0xB2};
SHA1 sha;
sha.update(namespaceUUID);
sha.update(fileHash);
auto s = sha.final();
std::copy(s.data(), &s.data()[output.size()], output.data());
} else if (version == 4) {
if (auto ec = llvm::getRandomBytes(output.data(), output.size()))
error("entropy source failure: " + ec.message());
}
// Set the UUID version and variant fields.
// The version is the upper nibble of byte 6 (0b0101xxxx or 0b0100xxxx)
output[6] = (static_cast<uint8_t>(version) << 4) | (output[6] & 0xF);

// The variant is DCE 1.1/ISO 11578 (0b10xxxxxx)
output[8] &= 0xBF;
output[8] |= 0x80;
}

void Writer::writeBuildId() {
if (!out.buildIdSec->isNeeded())
return;
if (config->buildId == BuildIdKind::Hexstring) {
out.buildIdSec->writeBuildId(config->buildIdVector);
return;
}

// Compute a hash of all sections of the output file.
size_t hashSize = out.buildIdSec->hashSize;
std::vector<uint8_t> buildId(hashSize);
llvm::ArrayRef<uint8_t> buf{buffer->getBufferStart(), size_t(fileSize)};

switch (config->buildId) {
case BuildIdKind::Fast: {
std::vector<uint8_t> fileHash(8);
computeHash(fileHash, buf, [](uint8_t *dest, ArrayRef<uint8_t> arr) {
support::endian::write64le(dest, xxHash64(arr));
});
makeUUID(5, fileHash, buildId);
break;
}
case BuildIdKind::Sha1:
computeHash(buildId, buf, [&](uint8_t *dest, ArrayRef<uint8_t> arr) {
memcpy(dest, SHA1::hash(arr).data(), hashSize);
});
break;
case BuildIdKind::Uuid:
makeUUID(4, {}, buildId);
break;
default:
llvm_unreachable("unknown BuildIdKind");
}
out.buildIdSec->writeBuildId(buildId);
}

static void setGlobalPtr(DefinedGlobal *g, uint64_t memoryPtr) {
LLVM_DEBUG(dbgs() << "setGlobalPtr " << g->getName() << " -> " << memoryPtr << "\n");
g->global->setPointerValue(memoryPtr);
Expand Down Expand Up @@ -456,6 +547,7 @@ void Writer::addSections() {
addSection(out.nameSec);
addSection(out.producersSec);
addSection(out.targetFeaturesSec);
addSection(out.buildIdSec);
}

void Writer::finalizeSections() {
Expand Down Expand Up @@ -1577,6 +1669,7 @@ void Writer::createSyntheticSections() {
out.elemSec = make<ElemSection>();
out.producersSec = make<ProducersSection>();
out.targetFeaturesSec = make<TargetFeaturesSection>();
out.buildIdSec = make<BuildIdSection>();
}

void Writer::createSyntheticSectionsPostLayout() {
Expand Down Expand Up @@ -1738,6 +1831,7 @@ void Writer::run() {

log("-- writeSections");
writeSections();
writeBuildId();
if (errorCount())
return;

Expand Down

0 comments on commit c7af9ae

Please sign in to comment.