Parametrize zsync bloc size for huge files

openSUSE · Jun 4, 2020 · 7c13aa1 · 7c13aa1
1 parent e9c76fb
commit 7c13aa1
Showing 3 changed files with 33 additions and 5 deletions.
diff --git a/mb/mb/conf.py b/mb/mb/conf.py
@@ -8,6 +8,7 @@
 boolean_opts = ['zsync_hashes', 'chunked_hashes']
 
 DEFAULTS = {'zsync_hashes': False,
+            'zsync_block_size_for_1G': None,
             'chunked_hashes': True,
             'chunk_size': 262144,
             'apache_documentroot': None}
@@ -74,6 +75,11 @@ def __init__(self, conffile='/etc/mirrorbrain.conf', instance=None):
                         'cannot parse setting in [%s] section: %r' % (i, b + str(e)), conffile)
                 except configparser.NoOptionError as e:
                     pass
+            try:
+                self.general[i]['zsync_block_size_for_1G'] = adjust_zsync_block_size_for_1G(cp.getint(i, 'zsync_block_size_for_1G'))
+            except configparser.NoOptionError as e:
+                pass
+
             # set default values where the config didn't define anything
             for d in DEFAULTS:
                 try:
@@ -95,3 +101,19 @@ def __init__(self, conffile='/etc/mirrorbrain.conf', instance=None):
         # take care of the [mirrorprobe] section
         #
         self.mirrorprobe = dict(cp.items('mirrorprobe'))
+
+def adjust_zsync_block_size_for_1G(n):
+    if n < 1024:
+        print("zsync_block_size_for_1G is too small, ignoring", file=sys.stderr);
+        return DEFAULTS['zsync_block_size_for_1G'] 
+    if (n & (n-1) == 0) and n != 0:
+        return n
+
+    exponent = 0
+    while n >= 2:
+        n /= 2
+        exponent += 1
+    n = 2 ** exponent
+
+    print("zsync_block_size_for_1G must be power of 2 (512, 1024, 2048, ...), adjusting down to: " + repr(n)) # , file=sys.stderr);
+    return n
diff --git a/mb/mb/hashes.py b/mb/mb/hashes.py
@@ -29,7 +29,7 @@ class Hasheable:
 
     def __init__(self, basename, src_dir=None, dst_dir=None,
                  base_dir=None, do_zsync_hashes=False,
-                 do_chunked_hashes=True, chunk_size=DEFAULT_PIECESIZE, do_chunked_with_zsync=False):
+                 do_chunked_hashes=True, chunk_size=DEFAULT_PIECESIZE, do_chunked_with_zsync=False, zsync_block_size_for_1G=None):
         self.basename = basename
         if src_dir:
             self.src_dir = src_dir
@@ -58,6 +58,7 @@ def __init__(self, basename, src_dir=None, dst_dir=None,
         self.hb.do_chunked_hashes = do_chunked_hashes
         self.hb.do_chunked_with_zsync = do_chunked_with_zsync
         self.hb.chunk_size = chunk_size
+        self.hb.zsync_block_size_for_1G = zsync_block_size_for_1G
 
     def islink(self):
         return stat.S_ISLNK(self.mode)
@@ -151,7 +152,7 @@ def check_db(self, conn, verbose=False, dry_run=False, force=False):
                 file_id = c.fetchone()[0]
             zsums = ''
             for i in self.hb.zsums:
-                zsums = zsums + i.hexdigest()
+                zsums = zsums + i.hex()
 
             c.execute("""INSERT INTO hash (file_id, mtime, size, md5,
                                            sha1, sha256, sha1piecesize,
@@ -194,7 +195,7 @@ def check_db(self, conn, verbose=False, dry_run=False, force=False):
 
             zsums = ''
             for i in self.hb.zsums:
-                zsums = zsums + i.hexdigest()
+                zsums = zsums + i.hex()
 
             c.execute("""UPDATE hash set mtime = %s, size = %s,
                                          md5 = decode(%s, 'hex'),
@@ -367,7 +368,9 @@ def zs_guess_zsync_params(self):
         import math
 
         size = self.h.size
-        if size < 100000000:
+        if size > 1024*1024*1024 and self.zsync_block_size_for_1G is not None:
+            blocksize = self.zsync_block_size_for_1G
+        elif size < 100000000:
             blocksize = 2048
         else:
             blocksize = 4096
@@ -423,6 +426,7 @@ def zs_get_block_sums(self, buf):
             c = md4.digest()
 
             if self.do_zsync_hashes:
+                import zsync
                 r = zsync.rsum06(block)
 
                 # save only some trailing bytes

diff --git a/mb/scripts/mb b/mb/scripts/mb
@@ -1177,7 +1177,9 @@ class MirrorDoctor(cmdln.Cmdln):
                                                     do_chunked_hashes=self.config.dbconfig.get(
                                                         'chunked_hashes'),
                                                     chunk_size=chunk_size,
-                                                    do_chunked_with_zsync=do_chunked_with_zsync)
+                                                    do_chunked_with_zsync=do_chunked_with_zsync,
+                                                    zsync_block_size_for_1G=self.config.dbconfig.get(
+                                                        'zsync_block_size_for_1G'))
                 except OSError as e:
                     if e.errno == errno.ENOENT:
                         sys.stderr.write('File vanished: %r\n' % src)