diff --git a/mb/mb/conf.py b/mb/mb/conf.py index d5b10e4a..ff819d1e 100644 --- a/mb/mb/conf.py +++ b/mb/mb/conf.py @@ -8,6 +8,7 @@ boolean_opts = ['zsync_hashes', 'chunked_hashes'] DEFAULTS = {'zsync_hashes': False, + 'zsync_block_size_for_1G': None, 'chunked_hashes': True, 'chunk_size': 262144, 'apache_documentroot': None} @@ -74,6 +75,11 @@ def __init__(self, conffile='/etc/mirrorbrain.conf', instance=None): 'cannot parse setting in [%s] section: %r' % (i, b + str(e)), conffile) except configparser.NoOptionError as e: pass + try: + self.general[i]['zsync_block_size_for_1G'] = adjust_zsync_block_size_for_1G(cp.getint(i, 'zsync_block_size_for_1G')) + except configparser.NoOptionError as e: + pass + # set default values where the config didn't define anything for d in DEFAULTS: try: @@ -95,3 +101,19 @@ def __init__(self, conffile='/etc/mirrorbrain.conf', instance=None): # take care of the [mirrorprobe] section # self.mirrorprobe = dict(cp.items('mirrorprobe')) + +def adjust_zsync_block_size_for_1G(n): + if n < 1024: + print("zsync_block_size_for_1G is too small, ignoring", file=sys.stderr); + return DEFAULTS['zsync_block_size_for_1G'] + if (n & (n-1) == 0) and n != 0: + return n + + exponent = 0 + while n >= 2: + n /= 2 + exponent += 1 + n = 2 ** exponent + + print("zsync_block_size_for_1G must be power of 2 (512, 1024, 2048, ...), adjusting down to: " + repr(n)) # , file=sys.stderr); + return n diff --git a/mb/mb/hashes.py b/mb/mb/hashes.py index 2df4a925..405b78be 100644 --- a/mb/mb/hashes.py +++ b/mb/mb/hashes.py @@ -29,7 +29,7 @@ class Hasheable: def __init__(self, basename, src_dir=None, base_dir=None, do_zsync_hashes=False, - do_chunked_hashes=True, chunk_size=DEFAULT_PIECESIZE, do_chunked_with_zsync=False, skip_metadata=False): + do_chunked_hashes=True, chunk_size=DEFAULT_PIECESIZE, do_chunked_with_zsync=False, skip_metadata=False, zsync_block_size_for_1G=None): self.basename = basename if src_dir: self.src_dir = src_dir @@ -55,6 +55,7 @@ def __init__(self, basename, src_dir=None, self.hb.do_chunked_hashes = do_chunked_hashes self.hb.do_chunked_with_zsync = do_chunked_with_zsync self.hb.chunk_size = chunk_size + self.hb.zsync_block_size_for_1G = zsync_block_size_for_1G def islink(self): return stat.S_ISLNK(self.mode) @@ -114,7 +115,7 @@ def check_db(self, conn, verbose=False, dry_run=False, force=False): zsums = '' for i in self.hb.zsums: - zsums = zsums + i.hexdigest() + zsums = zsums + i.hex() c.execute("""UPDATE files SET mtime = to_timestamp(%s), @@ -162,7 +163,7 @@ def check_db(self, conn, verbose=False, dry_run=False, force=False): zsums = '' for i in self.hb.zsums: - zsums = zsums + i.hexdigest() + zsums = zsums + i.hex() c.execute("""UPDATE files set mtime = to_timestamp(%s), size = %s, md5 = decode(%s, 'hex'), @@ -333,7 +334,9 @@ def zs_guess_zsync_params(self): import math size = self.h.size - if size < 100000000: + if size > 1024*1024*1024 and self.zsync_block_size_for_1G is not None: + blocksize = self.zsync_block_size_for_1G + elif size < 100000000: blocksize = 2048 else: blocksize = 4096 @@ -389,6 +392,7 @@ def zs_get_block_sums(self, buf): c = md4.digest() if self.do_zsync_hashes: + import zsync r = zsync.rsum06(block) # save only some trailing bytes diff --git a/mb/scripts/mb b/mb/scripts/mb index 93b5422e..02f39a2f 100755 --- a/mb/scripts/mb +++ b/mb/scripts/mb @@ -1158,7 +1158,9 @@ class MirrorDoctor(cmdln.Cmdln): 'chunked_hashes'), chunk_size=chunk_size, do_chunked_with_zsync=do_chunked_with_zsync, - skip_metadata=skip_metadata) + skip_metadata=skip_metadata, + zsync_block_size_for_1G=self.config.dbconfig.get( + 'zsync_block_size_for_1G')) except OSError as e: if e.errno == errno.ENOENT: sys.stderr.write('File vanished: %r\n' % src) diff --git a/mb/tests/conf_tests.py b/mb/tests/conf_tests.py new file mode 100644 index 00000000..618b7774 --- /dev/null +++ b/mb/tests/conf_tests.py @@ -0,0 +1,22 @@ +import unittest + +from mb.conf import adjust_zsync_block_size_for_1G + + +class TestConfig(unittest.TestCase): + + def test_adjust_zsync_block_size_for_1G(self): + cases = { + 0: None, + 1023: None, + 1025: 1024, + 3*1024: 2*1024, + 4*1024: 4*1024, + 4*1024+1: 4*1024, + 1024*1024*1024+1: 1024*1024*1024 + } + for n in cases: + self.assertEqual(cases[n], adjust_zsync_block_size_for_1G(n), "for input " + repr(n)) + +if __name__ == '__main__': + unittest.main()