-
Notifications
You must be signed in to change notification settings - Fork 54
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Switch Content Hashing from CRC32 to XXHash64 (#198)
- CRC32 collision probability is too high for comfort since it's only 32-bits - Switching to XXHash64 since its a high quality hash, 64-bits and extremely fast (faster than CRC32 for medium size inputs: https://lz4.github.io/lz4-java/1.3.0/xxhash-benchmark/)
- Loading branch information
Showing
8 changed files
with
89 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
package sjsonnet | ||
|
||
import java.nio.file.{Files, Path => JavaPath} | ||
|
||
import scala.util.Random | ||
|
||
import net.jpountz.xxhash.{StreamingXXHash64, XXHashFactory, XXHash64} | ||
|
||
import utest._ | ||
import TestUtils.eval | ||
|
||
object XxHash64Tests extends TestSuite { | ||
val tests = Tests { | ||
|
||
test("xxhash") { | ||
for (sizeInKb <- List(1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024)) { | ||
val (randomContent, tempFilePath) = generateRandomContentAndSaveToFile(sizeInKb) | ||
val xxHash64 = XXHashFactory.fastestInstance().hash64() | ||
// Use the non-streaming version of xxHash64 to hash the whole byte array | ||
val xxHash64Result = xxHash64.hash(randomContent, 0, randomContent.length, 0).toString | ||
// Then use the streaming version of xxHash64 to hash the file in chunks | ||
val cachedFile = new CachedResolvedFile( | ||
OsPath(os.Path(tempFilePath)), | ||
memoryLimitBytes = Int.MaxValue, | ||
cacheThresholdBytes = 0) | ||
// They should agree | ||
val hash = cachedFile.contentHash | ||
assert(xxHash64Result == hash) | ||
} | ||
} | ||
} | ||
|
||
private def generateRandomContentAndSaveToFile(sizeInKb: Int): (Array[Byte], JavaPath) = { | ||
val random = new Random() | ||
val byteArraySize = 1024 * sizeInKb | ||
val randomContent = new Array[Byte](byteArraySize) | ||
random.nextBytes(randomContent) | ||
|
||
val tempFilePath = Files.createTempFile("randomContent", ".tmp") | ||
Files.write(tempFilePath, randomContent) | ||
|
||
(randomContent, tempFilePath) | ||
} | ||
} | ||
|