fossilize_db: Reduce cache thrashing on reads

As a background service, we don't want to dominate the cache, so hint the kernel that we no longer need the data in cache. Also hint the kernel in the database scanner loop that we're going to read the database sequentially, then discard the file from the page cache. This won't actually remove data from the cache, only promote it to be discarded first on cache pressure. This should be a no-op as long as enough RAM is available to the system. Otherwise, it should stop the kernel from swapping out anonymous data or warm cache entries owned by other applications. See-also: #84 See-also: #99 Signed-off-by: Kai Krakow <kai@kaishome.de>
ValveSoftware · Oct 18, 2020 · 0556eb0 · 0556eb0
1 parent e612b5f
commit 0556eb0
Showing 1 changed file with 34 additions and 1 deletion.
diff --git a/fossilize_db.cpp b/fossilize_db.cpp
@@ -27,6 +27,10 @@
 #include <fcntl.h>
 #endif
 
+#ifdef __linux__
+#include <fcntl.h>
+#endif
+
 #include "fossilize_db.hpp"
 #include "path.hpp"
 #include "layer/utils.hpp"
@@ -755,6 +759,12 @@ struct StreamArchive : DatabaseInterface
 
 			if (len != 0)
 			{
+#ifdef __linux__
+				// Let Linux know we are going to read the file sequentially so
+				// it can prefetch data
+				posix_fadvise(fileno(file), 0, len, POSIX_FADV_SEQUENTIAL);
+#endif
+
 				uint8_t magic[MagicSize];
 				if (fread(magic, 1, MagicSize, file) != MagicSize)
 					return false;
@@ -786,10 +796,17 @@ struct StreamArchive : DatabaseInterface
 					// NAME + HEADER in one read
 					if (fread(bytes_to_read, 1, sizeof(bytes_to_read), file) != sizeof(bytes_to_read))
 						return false;
+
+#ifdef __linux__
+					// Hint the Linux page cache we no longer need this data in the cache
+					posix_fadvise(fileno(file), offset, sizeof(bytes_to_read), POSIX_FADV_DONTNEED);
+#endif
+
 					offset += sizeof(bytes_to_read);
 					header_raw = (PayloadHeaderRaw*)&bytes_to_read[FOSSILIZE_BLOB_HASH_LENGTH];
 					convert_from_le(header, *header_raw);
 
+
 					// Corrupt entry. Our process might have been killed before we could write all data.
 					if (offset + header.payload_size > len)
 					{
@@ -876,12 +893,18 @@ struct StreamArchive : DatabaseInterface
 			{
 				// Include the header.
 				ConditionalLockGuard holder(read_lock, (flags & PAYLOAD_READ_CONCURRENT_BIT) != 0);
-				if (fseek(file, itr->second.offset - sizeof(PayloadHeaderRaw), SEEK_SET) < 0)
+				size_t offset = itr->second.offset - sizeof(PayloadHeaderRaw);
+				if (fseek(file, offset, SEEK_SET) < 0)
 					return false;
 
 				size_t read_size = itr->second.header.payload_size + sizeof(PayloadHeaderRaw);
 				if (fread(blob, 1, read_size, file) != read_size)
 					return false;
+
+#ifdef __linux__
+				// Hint the Linux page cache we no longer need this data in the cache
+				posix_fadvise(fileno(file), offset, read_size, POSIX_FADV_DONTNEED);
+#endif
 			}
 			else
 			{
@@ -1076,6 +1099,11 @@ struct StreamArchive : DatabaseInterface
 			size_t read_size = entry.header.payload_size;
 			if (fread(blob, 1, read_size, file) != read_size)
 				return false;
+
+#ifdef __linux__
+			// Hint the Linux page cache we no longer need this data in the cache
+			posix_fadvise(fileno(file), entry.offset, read_size, POSIX_FADV_DONTNEED);
+#endif
 		}
 
 		if (entry.header.crc != 0) // Verify checksum.
@@ -1133,6 +1161,11 @@ struct StreamArchive : DatabaseInterface
 				return false;
 			if (fread(dst_zlib_buffer, 1, entry.header.payload_size, file) != entry.header.payload_size)
 				return false;
+
+#ifdef __linux__
+			// Hint the Linux page cache we no longer need this data in the cache
+			posix_fadvise(fileno(file), entry.offset, entry.header.payload_size, POSIX_FADV_DONTNEED);
+#endif
 		}
 
 		if (entry.header.crc != 0) // Verify checksum.