Skip to content

Commit

Permalink
Merge bitcoin/bitcoin#30884: streams: cache file position within Auto…
Browse files Browse the repository at this point in the history
…File

a240e15 streams: remove AutoFile::Get() entirely (Pieter Wuille)
e624a9b streams: cache file position within AutoFile (Pieter Wuille)

Pull request description:

  Fixes #30833.

  Instead of relying on frequent `ftell` calls (which appear to cause a significant slowdown on some systems) in XOR-enabled `AutoFile`s, cache the file position within `AutoFile` itself.

ACKs for top commit:
  achow101:
    ACK a240e15
  davidgumberg:
    untested reACK bitcoin/bitcoin@a240e15
  theStack:
    Code-review ACK a240e15

Tree-SHA512: fd3681edc018afaf955dc7a41a0c953ca80d46c1129e3c5b306c87c95aae93b2fe7b900794eb8b6f10491f9211645e7939918a28838295e6873eb226fca7006f
  • Loading branch information
achow101 committed Sep 17, 2024
2 parents 06329eb + a240e15 commit 9f1aa88
Show file tree
Hide file tree
Showing 12 changed files with 75 additions and 49 deletions.
2 changes: 1 addition & 1 deletion src/addrdb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data
remove(pathTmp);
return false;
}
if (!FileCommit(fileout.Get())) {
if (!fileout.Commit()) {
fileout.fclose();
remove(pathTmp);
LogError("%s: Failed to flush file %s\n", __func__, fs::PathToString(pathTmp));
Expand Down
2 changes: 1 addition & 1 deletion src/bench/streams_findbyte.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ static void FindByte(benchmark::Bench& bench)
uint8_t data[file_size] = {0};
data[file_size-1] = 1;
file << data;
std::rewind(file.Get());
file.seek(0, SEEK_SET);
BufferedFile bf{file, /*nBufSize=*/file_size + 1, /*nRewindIn=*/file_size};

bench.run([&] {
Expand Down
6 changes: 3 additions & 3 deletions src/index/blockfilterindex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ bool BlockFilterIndex::CustomCommit(CDBBatch& batch)
LogError("%s: Failed to open filter file %d\n", __func__, pos.nFile);
return false;
}
if (!FileCommit(file.Get())) {
if (!file.Commit()) {
LogError("%s: Failed to commit filter file %d\n", __func__, pos.nFile);
return false;
}
Expand Down Expand Up @@ -201,11 +201,11 @@ size_t BlockFilterIndex::WriteFilterToDisk(FlatFilePos& pos, const BlockFilter&
LogPrintf("%s: Failed to open filter file %d\n", __func__, pos.nFile);
return 0;
}
if (!TruncateFile(last_file.Get(), pos.nPos)) {
if (!last_file.Truncate(pos.nPos)) {
LogPrintf("%s: Failed to truncate filter file %d\n", __func__, pos.nFile);
return 0;
}
if (!FileCommit(last_file.Get())) {
if (!last_file.Commit()) {
LogPrintf("%s: Failed to commit filter file %d\n", __func__, pos.nFile);
return 0;
}
Expand Down
5 changes: 1 addition & 4 deletions src/index/txindex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,7 @@ bool TxIndex::FindTx(const uint256& tx_hash, uint256& block_hash, CTransactionRe
CBlockHeader header;
try {
file >> header;
if (fseek(file.Get(), postx.nTxOffset, SEEK_CUR)) {
LogError("%s: fseek(...) failed\n", __func__);
return false;
}
file.seek(postx.nTxOffset, SEEK_CUR);
file >> TX_WITH_WITNESS(tx);
} catch (const std::exception& e) {
LogError("%s: Deserialize or I/O error - %s\n", __func__, e.what());
Expand Down
4 changes: 2 additions & 2 deletions src/node/blockstorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ bool BlockManager::UndoWriteToDisk(const CBlockUndo& blockundo, FlatFilePos& pos
fileout << GetParams().MessageStart() << nSize;

// Write undo data
long fileOutPos = ftell(fileout.Get());
long fileOutPos = fileout.tell();
if (fileOutPos < 0) {
LogError("%s: ftell failed\n", __func__);
return false;
Expand Down Expand Up @@ -981,7 +981,7 @@ bool BlockManager::WriteBlockToDisk(const CBlock& block, FlatFilePos& pos) const
fileout << GetParams().MessageStart() << nSize;

// Write block
long fileOutPos = ftell(fileout.Get());
long fileOutPos = fileout.tell();
if (fileOutPos < 0) {
LogError("%s: ftell failed\n", __func__);
return false;
Expand Down
4 changes: 2 additions & 2 deletions src/node/mempool_persist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@ bool DumpMempool(const CTxMemPool& pool, const fs::path& dump_path, FopenFn mock
LogInfo("Writing %d unbroadcast transactions to file.\n", unbroadcast_txids.size());
file << unbroadcast_txids;

if (!skip_file_commit && !FileCommit(file.Get()))
throw std::runtime_error("FileCommit failed");
if (!skip_file_commit && !file.Commit())
throw std::runtime_error("Commit failed");
file.fclose();
if (!RenameOver(dump_path + ".new", dump_path)) {
throw std::runtime_error("Rename failed");
Expand Down
6 changes: 4 additions & 2 deletions src/node/utxo_snapshot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,11 @@ std::optional<uint256> ReadSnapshotBaseBlockhash(fs::path chaindir)
}
afile >> base_blockhash;

if (std::fgetc(afile.Get()) != EOF) {
int64_t position = afile.tell();
afile.seek(0, SEEK_END);
if (position != afile.tell()) {
LogPrintf("[snapshot] warning: unexpected trailing data in %s\n", read_from_str);
} else if (std::ferror(afile.Get())) {
} else if (afile.IsError()) {
LogPrintf("[snapshot] warning: i/o error reading %s\n", read_from_str);
}
return base_blockhash;
Expand Down
69 changes: 49 additions & 20 deletions src/streams.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,29 @@

#include <span.h>
#include <streams.h>
#include <util/fs_helpers.h>

#include <array>

AutoFile::AutoFile(std::FILE* file, std::vector<std::byte> data_xor)
: m_file{file}, m_xor{std::move(data_xor)}
{
if (!IsNull()) {
auto pos{std::ftell(m_file)};
if (pos >= 0) m_position = pos;
}
}

std::size_t AutoFile::detail_fread(Span<std::byte> dst)
{
if (!m_file) throw std::ios_base::failure("AutoFile::read: file handle is nullptr");
if (m_xor.empty()) {
return std::fread(dst.data(), 1, dst.size(), m_file);
} else {
const auto init_pos{std::ftell(m_file)};
if (init_pos < 0) throw std::ios_base::failure("AutoFile::read: ftell failed");
std::size_t ret{std::fread(dst.data(), 1, dst.size(), m_file)};
util::Xor(dst.subspan(0, ret), m_xor, init_pos);
return ret;
size_t ret = std::fread(dst.data(), 1, dst.size(), m_file);
if (!m_xor.empty()) {
if (!m_position.has_value()) throw std::ios_base::failure("AutoFile::read: position unknown");
util::Xor(dst.subspan(0, ret), m_xor, *m_position);
}
if (m_position.has_value()) *m_position += ret;
return ret;
}

void AutoFile::seek(int64_t offset, int origin)
Expand All @@ -29,18 +37,23 @@ void AutoFile::seek(int64_t offset, int origin)
if (std::fseek(m_file, offset, origin) != 0) {
throw std::ios_base::failure(feof() ? "AutoFile::seek: end of file" : "AutoFile::seek: fseek failed");
}
if (origin == SEEK_SET) {
m_position = offset;
} else if (origin == SEEK_CUR && m_position.has_value()) {
*m_position += offset;
} else {
int64_t r{std::ftell(m_file)};
if (r < 0) {
throw std::ios_base::failure("AutoFile::seek: ftell failed");
}
m_position = r;
}
}

int64_t AutoFile::tell()
{
if (IsNull()) {
throw std::ios_base::failure("AutoFile::tell: file handle is nullptr");
}
int64_t r{std::ftell(m_file)};
if (r < 0) {
throw std::ios_base::failure("AutoFile::tell: ftell failed");
}
return r;
if (!m_position.has_value()) throw std::ios_base::failure("AutoFile::tell: position unknown");
return *m_position;
}

void AutoFile::read(Span<std::byte> dst)
Expand All @@ -60,6 +73,7 @@ void AutoFile::ignore(size_t nSize)
throw std::ios_base::failure(feof() ? "AutoFile::ignore: end of file" : "AutoFile::ignore: fread failed");
}
nSize -= nNow;
if (m_position.has_value()) *m_position += nNow;
}
}

Expand All @@ -70,19 +84,34 @@ void AutoFile::write(Span<const std::byte> src)
if (std::fwrite(src.data(), 1, src.size(), m_file) != src.size()) {
throw std::ios_base::failure("AutoFile::write: write failed");
}
if (m_position.has_value()) *m_position += src.size();
} else {
auto current_pos{std::ftell(m_file)};
if (current_pos < 0) throw std::ios_base::failure("AutoFile::write: ftell failed");
if (!m_position.has_value()) throw std::ios_base::failure("AutoFile::write: position unknown");
std::array<std::byte, 4096> buf;
while (src.size() > 0) {
auto buf_now{Span{buf}.first(std::min<size_t>(src.size(), buf.size()))};
std::copy(src.begin(), src.begin() + buf_now.size(), buf_now.begin());
util::Xor(buf_now, m_xor, current_pos);
util::Xor(buf_now, m_xor, *m_position);
if (std::fwrite(buf_now.data(), 1, buf_now.size(), m_file) != buf_now.size()) {
throw std::ios_base::failure{"XorFile::write: failed"};
}
src = src.subspan(buf_now.size());
current_pos += buf_now.size();
*m_position += buf_now.size();
}
}
}

bool AutoFile::Commit()
{
return ::FileCommit(m_file);
}

bool AutoFile::IsError()
{
return ferror(m_file);
}

bool AutoFile::Truncate(unsigned size)
{
return ::TruncateFile(m_file, size);
}
13 changes: 6 additions & 7 deletions src/streams.h
Original file line number Diff line number Diff line change
Expand Up @@ -390,9 +390,10 @@ class AutoFile
protected:
std::FILE* m_file;
std::vector<std::byte> m_xor;
std::optional<int64_t> m_position;

public:
explicit AutoFile(std::FILE* file, std::vector<std::byte> data_xor={}) : m_file{file}, m_xor{std::move(data_xor)} {}
explicit AutoFile(std::FILE* file, std::vector<std::byte> data_xor={});

~AutoFile() { fclose(); }

Expand All @@ -419,12 +420,6 @@ class AutoFile
return ret;
}

/** Get wrapped FILE* without transfer of ownership.
* @note Ownership of the FILE* will remain with this class. Use this only if the scope of the
* AutoFile outlives use of the passed pointer.
*/
std::FILE* Get() const { return m_file; }

/** Return true if the wrapped FILE* is nullptr, false otherwise.
*/
bool IsNull() const { return m_file == nullptr; }
Expand Down Expand Up @@ -458,6 +453,10 @@ class AutoFile
::Unserialize(*this, obj);
return *this;
}

bool Commit();
bool IsError();
bool Truncate(unsigned size);
};

/** Wrapper around an AutoFile& that implements a ring buffer to
Expand Down
1 change: 0 additions & 1 deletion src/test/fuzz/autofile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ FUZZ_TARGET(autofile)
WriteToStream(fuzzed_data_provider, auto_file);
});
}
(void)auto_file.Get();
(void)auto_file.IsNull();
if (fuzzed_data_provider.ConsumeBool()) {
FILE* f = auto_file.release();
Expand Down
6 changes: 3 additions & 3 deletions src/test/streams_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file)
for (uint8_t j = 0; j < 40; ++j) {
file << j;
}
std::rewind(file.Get());
file.seek(0, SEEK_SET);

// The buffer size (second arg) must be greater than the rewind
// amount (third arg).
Expand Down Expand Up @@ -391,7 +391,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_skip)
for (uint8_t j = 0; j < 40; ++j) {
file << j;
}
std::rewind(file.Get());
file.seek(0, SEEK_SET);

// The buffer is 25 bytes, allow rewinding 10 bytes.
BufferedFile bf{file, 25, 10};
Expand Down Expand Up @@ -444,7 +444,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
for (uint8_t i = 0; i < fileSize; ++i) {
file << i;
}
std::rewind(file.Get());
file.seek(0, SEEK_SET);

size_t bufSize = m_rng.randrange(300) + 1;
size_t rewindSize = m_rng.randrange(bufSize);
Expand Down
6 changes: 3 additions & 3 deletions src/util/asmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,10 @@ std::vector<bool> DecodeAsmap(fs::path path)
LogPrintf("Failed to open asmap file from disk\n");
return bits;
}
fseek(filestr, 0, SEEK_END);
int length = ftell(filestr);
file.seek(0, SEEK_END);
int length = file.tell();
LogPrintf("Opened asmap file %s (%d bytes) from disk\n", fs::quoted(fs::PathToString(path)), length);
fseek(filestr, 0, SEEK_SET);
file.seek(0, SEEK_SET);
uint8_t cur_byte;
for (int i = 0; i < length; ++i) {
file >> cur_byte;
Expand Down

0 comments on commit 9f1aa88

Please sign in to comment.