From 0d6dfd87500e11933f4a47e3c242cc22360f47cf Mon Sep 17 00:00:00 2001 From: Leandro Motta Barros Date: Thu, 29 Jul 2021 18:40:59 -0300 Subject: [PATCH] Use optimal block length to generate deltas Previously, we used a block length hardcoded to 512 bytes. Our measurements have shown that this value was generally inadequate: it produced relatively large deltas in took relatively long times to do that. librsync, by default, uses block length equals to the square root of the old (basis) file. This value results in significantly smaller deltas and shorter run times. In this commit, we do one more optimization and round this value up to the next power of two value. Since librsync-go has a code path optimized for buffers with sizes that are powers of two, this gives us another performance gain. --- daemon/images/image_delta.go | 26 +++++++++++++++++++- daemon/images/image_delta_test.go | 41 +++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 daemon/images/image_delta_test.go diff --git a/daemon/images/image_delta.go b/daemon/images/image_delta.go index 05c4051f76..ef75d57b58 100644 --- a/daemon/images/image_delta.go +++ b/daemon/images/image_delta.go @@ -6,6 +6,7 @@ import ( "encoding/json" "io" "io/ioutil" + "math" "os" "time" @@ -64,8 +65,13 @@ func (i *ImageService) DeltaCreate(deltaSrc, deltaDest string, options types.Ima progressReader := progress.NewProgressReader(srcData, progressOutput, srcDataLen, deltaSrc, "Fingerprinting") defer progressReader.Close() + // librsync recommends setting block length to the square root of the old + // file size. We follow this advice but we round the value to a power of two + // because librsync-go is optimized for power of two block lengths. This + // gives us small delta sizes and fast execution times. + blockLen := roundToPowerOf2(uint32(math.Sqrt(float64(srcDataLen)))) sigStart := time.Now() - srcSig, err := librsync.Signature(bufio.NewReaderSize(progressReader, 65536), ioutil.Discard, 512, 32, librsync.BLAKE2_SIG_MAGIC) + srcSig, err := librsync.Signature(bufio.NewReaderSize(progressReader, 65536), ioutil.Discard, blockLen, 32, librsync.BLAKE2_SIG_MAGIC) if err != nil { return err } @@ -285,3 +291,21 @@ func (lock *imglock) unlock(ls layer.Store) { layer.ReleaseAndLog(ls, l) } } + +// roundToPowerOf2 rounds x to the next power of 2 value. If x is greater than +// 2^31, returns 2^31 (=2147483648) -- which is technically incorrect but is the +// largest power of two value we can represent on an uint32. +// +// The algorithm is adapted from Hacker's Delight, 2nd Edition, p.62. +func roundToPowerOf2(x uint32) uint32 { + if x >= 2147483648 { + return 2147483648 + } + x -= 1 + x |= x >> 1 + x |= x >> 2 + x |= x >> 4 + x |= x >> 8 + x |= x >> 16 + return x + 1 +} diff --git a/daemon/images/image_delta_test.go b/daemon/images/image_delta_test.go new file mode 100644 index 0000000000..1f93405151 --- /dev/null +++ b/daemon/images/image_delta_test.go @@ -0,0 +1,41 @@ +package images + +import ( + "fmt" + "math" + "testing" +) + +func Test_roundToPowerOf2(t *testing.T) { + tests := []struct { + x uint32 + want uint32 + }{ + {0, 0}, + {1, 1}, + {2, 2}, + {3, 4}, + {4, 4}, + {11, 16}, + {127, 128}, + {128, 128}, + {129, 256}, + {1234, 2048}, + {15000, 16384}, + {30001, 32768}, + {44444, 65536}, + {4190000, 4194304}, + {1073711111, 1073741824}, + {1073741824, 1073741824}, + {uint32(math.Pow(2, 31)), uint32(math.Pow(2, 31))}, + {2147483649, 2147483648}, + {4294967295, 2147483648}, + } + for _, tt := range tests { + t.Run(fmt.Sprintf("roundToPowerOf2(%v)", tt.x), func(t *testing.T) { + if got := roundToPowerOf2(tt.x); got != tt.want { + t.Errorf("got roundToPowerOf2(%v) = %v, want %v", tt.x, got, tt.want) + } + }) + } +}