Skip to content

Commit

Permalink
fossilize_db: Reduce cache thrashing on reads
Browse files Browse the repository at this point in the history
As a background service, we don't want to dominate the cache, so hint
the kernel that we no longer need the data in cache.

Also hint the kernel in the database scanner loop that we're going to
read the database sequentially, then discard the file from the page
cache.

This won't actually remove data from the cache, only promote it to be
discarded first on cache pressure. This should be a no-op as long as
enough RAM is available to the system. Otherwise, it should stop the
kernel from swapping out anonymous data or warm cache entries owned
by other applications.

See-also: #84
See-also: #99
Signed-off-by: Kai Krakow <kai@kaishome.de>
  • Loading branch information
kakra committed Oct 18, 2020
1 parent e612b5f commit 0556eb0
Showing 1 changed file with 34 additions and 1 deletion.
35 changes: 34 additions & 1 deletion fossilize_db.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
#include <fcntl.h>
#endif

#ifdef __linux__
#include <fcntl.h>
#endif

#include "fossilize_db.hpp"
#include "path.hpp"
#include "layer/utils.hpp"
Expand Down Expand Up @@ -755,6 +759,12 @@ struct StreamArchive : DatabaseInterface

if (len != 0)
{
#ifdef __linux__
// Let Linux know we are going to read the file sequentially so
// it can prefetch data
posix_fadvise(fileno(file), 0, len, POSIX_FADV_SEQUENTIAL);
#endif

uint8_t magic[MagicSize];
if (fread(magic, 1, MagicSize, file) != MagicSize)
return false;
Expand Down Expand Up @@ -786,10 +796,17 @@ struct StreamArchive : DatabaseInterface
// NAME + HEADER in one read
if (fread(bytes_to_read, 1, sizeof(bytes_to_read), file) != sizeof(bytes_to_read))
return false;

#ifdef __linux__
// Hint the Linux page cache we no longer need this data in the cache
posix_fadvise(fileno(file), offset, sizeof(bytes_to_read), POSIX_FADV_DONTNEED);
#endif

offset += sizeof(bytes_to_read);
header_raw = (PayloadHeaderRaw*)&bytes_to_read[FOSSILIZE_BLOB_HASH_LENGTH];
convert_from_le(header, *header_raw);


// Corrupt entry. Our process might have been killed before we could write all data.
if (offset + header.payload_size > len)
{
Expand Down Expand Up @@ -876,12 +893,18 @@ struct StreamArchive : DatabaseInterface
{
// Include the header.
ConditionalLockGuard holder(read_lock, (flags & PAYLOAD_READ_CONCURRENT_BIT) != 0);
if (fseek(file, itr->second.offset - sizeof(PayloadHeaderRaw), SEEK_SET) < 0)
size_t offset = itr->second.offset - sizeof(PayloadHeaderRaw);
if (fseek(file, offset, SEEK_SET) < 0)
return false;

size_t read_size = itr->second.header.payload_size + sizeof(PayloadHeaderRaw);
if (fread(blob, 1, read_size, file) != read_size)
return false;

#ifdef __linux__
// Hint the Linux page cache we no longer need this data in the cache
posix_fadvise(fileno(file), offset, read_size, POSIX_FADV_DONTNEED);
#endif
}
else
{
Expand Down Expand Up @@ -1076,6 +1099,11 @@ struct StreamArchive : DatabaseInterface
size_t read_size = entry.header.payload_size;
if (fread(blob, 1, read_size, file) != read_size)
return false;

#ifdef __linux__
// Hint the Linux page cache we no longer need this data in the cache
posix_fadvise(fileno(file), entry.offset, read_size, POSIX_FADV_DONTNEED);
#endif
}

if (entry.header.crc != 0) // Verify checksum.
Expand Down Expand Up @@ -1133,6 +1161,11 @@ struct StreamArchive : DatabaseInterface
return false;
if (fread(dst_zlib_buffer, 1, entry.header.payload_size, file) != entry.header.payload_size)
return false;

#ifdef __linux__
// Hint the Linux page cache we no longer need this data in the cache
posix_fadvise(fileno(file), entry.offset, entry.header.payload_size, POSIX_FADV_DONTNEED);
#endif
}

if (entry.header.crc != 0) // Verify checksum.
Expand Down

0 comments on commit 0556eb0

Please sign in to comment.