Skip to content

Commit

Permalink
[clang][serialization] Blobify IMPORTS strings and signatures (llvm#1…
Browse files Browse the repository at this point in the history
…16095)

This PR changes a part of the PCM format to store string-like things in
the blob attached to a record instead of VBR6-encoding them into the
record itself. Applied to the `IMPORTS` section (which is very hot),
this speeds up dependency scanning by 2.8%.
  • Loading branch information
jansvoboda11 authored Nov 18, 2024
1 parent f14e1a8 commit b769e35
Show file tree
Hide file tree
Showing 6 changed files with 227 additions and 186 deletions.
7 changes: 3 additions & 4 deletions clang/include/clang/Serialization/ASTBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ namespace serialization {
/// Version 4 of AST files also requires that the version control branch and
/// revision match exactly, since there is no backward compatibility of
/// AST files at this time.
const unsigned VERSION_MAJOR = 33;
const unsigned VERSION_MAJOR = 34;

/// AST file minor version number supported by this version of
/// Clang.
Expand Down Expand Up @@ -350,9 +350,8 @@ enum ControlRecordTypes {
/// and information about the compiler used to build this AST file.
METADATA = 1,

/// Record code for the list of other AST files imported by
/// this AST file.
IMPORTS,
/// Record code for another AST file imported by this AST file.
IMPORT,

/// Record code for the original file that was used to
/// generate the AST file, including both its file ID and its
Expand Down
14 changes: 4 additions & 10 deletions clang/include/clang/Serialization/ASTReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -2389,23 +2389,17 @@ class ASTReader

// Read a string
static std::string ReadString(const RecordDataImpl &Record, unsigned &Idx);

// Skip a string
static void SkipString(const RecordData &Record, unsigned &Idx) {
Idx += Record[Idx] + 1;
}
static StringRef ReadStringBlob(const RecordDataImpl &Record, unsigned &Idx,
StringRef &Blob);

// Read a path
std::string ReadPath(ModuleFile &F, const RecordData &Record, unsigned &Idx);

// Read a path
std::string ReadPath(StringRef BaseDirectory, const RecordData &Record,
unsigned &Idx);

// Skip a path
static void SkipPath(const RecordData &Record, unsigned &Idx) {
SkipString(Record, Idx);
}
std::string ReadPathBlob(StringRef BaseDirectory, const RecordData &Record,
unsigned &Idx, StringRef &Blob);

/// Read a version tuple.
static VersionTuple ReadVersionTuple(const RecordData &Record, unsigned &Idx);
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Serialization/ASTWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -769,13 +769,17 @@ class ASTWriter : public ASTDeserializationListener,

/// Add a string to the given record.
void AddString(StringRef Str, RecordDataImpl &Record);
void AddStringBlob(StringRef Str, RecordDataImpl &Record,
SmallVectorImpl<char> &Blob);

/// Convert a path from this build process into one that is appropriate
/// for emission in the module file.
bool PreparePathForOutput(SmallVectorImpl<char> &Path);

/// Add a path to the given record.
void AddPath(StringRef Path, RecordDataImpl &Record);
void AddPathBlob(StringRef Str, RecordDataImpl &Record,
SmallVectorImpl<char> &Blob);

/// Emit the current record with the given path as a blob.
void EmitRecordWithPath(unsigned Abbrev, RecordDataRef Record,
Expand Down
224 changes: 120 additions & 104 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3092,98 +3092,97 @@ ASTReader::ReadControlBlock(ModuleFile &F,
break;
}

case IMPORTS: {
case IMPORT: {
// Validate the AST before processing any imports (otherwise, untangling
// them can be error-prone and expensive). A module will have a name and
// will already have been validated, but this catches the PCH case.
if (ASTReadResult Result = readUnhashedControlBlockOnce())
return Result;

// Load each of the imported PCH files.
unsigned Idx = 0, N = Record.size();
while (Idx < N) {
// Read information about the AST file.
ModuleKind ImportedKind = (ModuleKind)Record[Idx++];
// Whether we're importing a standard c++ module.
bool IsImportingStdCXXModule = Record[Idx++];
// The import location will be the local one for now; we will adjust
// all import locations of module imports after the global source
// location info are setup, in ReadAST.
auto [ImportLoc, ImportModuleFileIndex] =
ReadUntranslatedSourceLocation(Record[Idx++]);
// The import location must belong to the current module file itself.
assert(ImportModuleFileIndex == 0);
off_t StoredSize = !IsImportingStdCXXModule ? (off_t)Record[Idx++] : 0;
time_t StoredModTime =
!IsImportingStdCXXModule ? (time_t)Record[Idx++] : 0;

ASTFileSignature StoredSignature;
if (!IsImportingStdCXXModule) {
auto FirstSignatureByte = Record.begin() + Idx;
StoredSignature = ASTFileSignature::create(
FirstSignatureByte, FirstSignatureByte + ASTFileSignature::size);
Idx += ASTFileSignature::size;
}
unsigned Idx = 0;
// Read information about the AST file.
ModuleKind ImportedKind = (ModuleKind)Record[Idx++];

// The import location will be the local one for now; we will adjust
// all import locations of module imports after the global source
// location info are setup, in ReadAST.
auto [ImportLoc, ImportModuleFileIndex] =
ReadUntranslatedSourceLocation(Record[Idx++]);
// The import location must belong to the current module file itself.
assert(ImportModuleFileIndex == 0);

StringRef ImportedName = ReadStringBlob(Record, Idx, Blob);

bool IsImportingStdCXXModule = Record[Idx++];

off_t StoredSize = 0;
time_t StoredModTime = 0;
ASTFileSignature StoredSignature;
std::string ImportedFile;

// For prebuilt and explicit modules first consult the file map for
// an override. Note that here we don't search prebuilt module
// directories if we're not importing standard c++ module, only the
// explicit name to file mappings. Also, we will still verify the
// size/signature making sure it is essentially the same file but
// perhaps in a different location.
if (ImportedKind == MK_PrebuiltModule || ImportedKind == MK_ExplicitModule)
ImportedFile = PP.getHeaderSearchInfo().getPrebuiltModuleFileName(
ImportedName, /*FileMapOnly*/ !IsImportingStdCXXModule);

if (IsImportingStdCXXModule && ImportedFile.empty()) {
Diag(diag::err_failed_to_find_module_file) << ImportedName;
return Missing;
}

std::string ImportedName = ReadString(Record, Idx);
std::string ImportedFile;

// For prebuilt and explicit modules first consult the file map for
// an override. Note that here we don't search prebuilt module
// directories if we're not importing standard c++ module, only the
// explicit name to file mappings. Also, we will still verify the
// size/signature making sure it is essentially the same file but
// perhaps in a different location.
if (ImportedKind == MK_PrebuiltModule || ImportedKind == MK_ExplicitModule)
ImportedFile = PP.getHeaderSearchInfo().getPrebuiltModuleFileName(
ImportedName, /*FileMapOnly*/ !IsImportingStdCXXModule);

// For C++20 Modules, we won't record the path to the imported modules
// in the BMI
if (!IsImportingStdCXXModule) {
if (ImportedFile.empty()) {
// Use BaseDirectoryAsWritten to ensure we use the same path in the
// ModuleCache as when writing.
ImportedFile = ReadPath(BaseDirectoryAsWritten, Record, Idx);
} else
SkipPath(Record, Idx);
} else if (ImportedFile.empty()) {
Diag(clang::diag::err_failed_to_find_module_file) << ImportedName;
return Missing;
}
if (!IsImportingStdCXXModule) {
StoredSize = (off_t)Record[Idx++];
StoredModTime = (time_t)Record[Idx++];

// If our client can't cope with us being out of date, we can't cope with
// our dependency being missing.
unsigned Capabilities = ClientLoadCapabilities;
if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
Capabilities &= ~ARR_Missing;

// Load the AST file.
auto Result = ReadASTCore(ImportedFile, ImportedKind, ImportLoc, &F,
Loaded, StoredSize, StoredModTime,
StoredSignature, Capabilities);

// If we diagnosed a problem, produce a backtrace.
bool recompilingFinalized =
Result == OutOfDate && (Capabilities & ARR_OutOfDate) &&
getModuleManager().getModuleCache().isPCMFinal(F.FileName);
if (isDiagnosedResult(Result, Capabilities) || recompilingFinalized)
Diag(diag::note_module_file_imported_by)
<< F.FileName << !F.ModuleName.empty() << F.ModuleName;
if (recompilingFinalized)
Diag(diag::note_module_file_conflict);

switch (Result) {
case Failure: return Failure;
// If we have to ignore the dependency, we'll have to ignore this too.
case Missing:
case OutOfDate: return OutOfDate;
case VersionMismatch: return VersionMismatch;
case ConfigurationMismatch: return ConfigurationMismatch;
case HadErrors: return HadErrors;
case Success: break;
StringRef SignatureBytes = Blob.substr(0, ASTFileSignature::size);
StoredSignature = ASTFileSignature::create(SignatureBytes.begin(),
SignatureBytes.end());
Blob = Blob.substr(ASTFileSignature::size);

if (ImportedFile.empty()) {
// Use BaseDirectoryAsWritten to ensure we use the same path in the
// ModuleCache as when writing.
ImportedFile =
ReadPathBlob(BaseDirectoryAsWritten, Record, Idx, Blob);
}
}

// If our client can't cope with us being out of date, we can't cope with
// our dependency being missing.
unsigned Capabilities = ClientLoadCapabilities;
if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
Capabilities &= ~ARR_Missing;

// Load the AST file.
auto Result = ReadASTCore(ImportedFile, ImportedKind, ImportLoc, &F,
Loaded, StoredSize, StoredModTime,
StoredSignature, Capabilities);

// If we diagnosed a problem, produce a backtrace.
bool recompilingFinalized =
Result == OutOfDate && (Capabilities & ARR_OutOfDate) &&
getModuleManager().getModuleCache().isPCMFinal(F.FileName);
if (isDiagnosedResult(Result, Capabilities) || recompilingFinalized)
Diag(diag::note_module_file_imported_by)
<< F.FileName << !F.ModuleName.empty() << F.ModuleName;
if (recompilingFinalized)
Diag(diag::note_module_file_conflict);

switch (Result) {
case Failure: return Failure;
// If we have to ignore the dependency, we'll have to ignore this too.
case Missing:
case OutOfDate: return OutOfDate;
case VersionMismatch: return VersionMismatch;
case ConfigurationMismatch: return ConfigurationMismatch;
case HadErrors: return HadErrors;
case Success: break;
}
break;
}

Expand Down Expand Up @@ -5624,36 +5623,38 @@ bool ASTReader::readASTFileControlBlock(
break;
}

case IMPORTS: {
case IMPORT: {
if (!NeedsImports)
break;

unsigned Idx = 0, N = Record.size();
while (Idx < N) {
// Read information about the AST file.
unsigned Idx = 0;
// Read information about the AST file.

// Skip Kind
Idx++;

// Skip Kind
Idx++;
bool IsStandardCXXModule = Record[Idx++];
// Skip ImportLoc
Idx++;

// Skip ImportLoc
Idx++;
StringRef ModuleName = ReadStringBlob(Record, Idx, Blob);

// In C++20 Modules, we don't record the path to imported
// modules in the BMI files.
if (IsStandardCXXModule) {
std::string ModuleName = ReadString(Record, Idx);
Listener.visitImport(ModuleName, /*Filename=*/"");
continue;
}
bool IsStandardCXXModule = Record[Idx++];

// Skip Size, ModTime and Signature
Idx += 1 + 1 + ASTFileSignature::size;
std::string ModuleName = ReadString(Record, Idx);
std::string FilenameStr = ReadString(Record, Idx);
auto Filename = ResolveImportedPath(PathBuf, FilenameStr, ModuleDir);
Listener.visitImport(ModuleName, *Filename);
// In C++20 Modules, we don't record the path to imported
// modules in the BMI files.
if (IsStandardCXXModule) {
Listener.visitImport(ModuleName, /*Filename=*/"");
continue;
}

// Skip Size and ModTime.
Idx += 1 + 1;
// Skip signature.
Blob = Blob.substr(ASTFileSignature::size);

StringRef FilenameStr = ReadStringBlob(Record, Idx, Blob);
auto Filename = ResolveImportedPath(PathBuf, FilenameStr, ModuleDir);
Listener.visitImport(ModuleName, *Filename);
break;
}

Expand Down Expand Up @@ -9602,6 +9603,14 @@ std::string ASTReader::ReadString(const RecordDataImpl &Record, unsigned &Idx) {
return Result;
}

StringRef ASTReader::ReadStringBlob(const RecordDataImpl &Record, unsigned &Idx,
StringRef &Blob) {
unsigned Len = Record[Idx++];
StringRef Result = Blob.substr(0, Len);
Blob = Blob.substr(Len);
return Result;
}

std::string ASTReader::ReadPath(ModuleFile &F, const RecordData &Record,
unsigned &Idx) {
return ReadPath(F.BaseDirectory, Record, Idx);
Expand All @@ -9613,6 +9622,13 @@ std::string ASTReader::ReadPath(StringRef BaseDirectory,
return ResolveImportedPathAndAllocate(PathBuf, Filename, BaseDirectory);
}

std::string ASTReader::ReadPathBlob(StringRef BaseDirectory,
const RecordData &Record, unsigned &Idx,
StringRef &Blob) {
StringRef Filename = ReadStringBlob(Record, Idx, Blob);
return ResolveImportedPathAndAllocate(PathBuf, Filename, BaseDirectory);
}

VersionTuple ASTReader::ReadVersionTuple(const RecordData &Record,
unsigned &Idx) {
unsigned Major = Record[Idx++];
Expand Down
Loading

0 comments on commit b769e35

Please sign in to comment.