Skip to content

Commit

Permalink
Parametrize zsync bloc size for huge files
Browse files Browse the repository at this point in the history
andrii-suse committed Jun 4, 2020
1 parent e9c76fb commit 7c13aa1
Showing 3 changed files with 33 additions and 5 deletions.
22 changes: 22 additions & 0 deletions mb/mb/conf.py
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
boolean_opts = ['zsync_hashes', 'chunked_hashes']

DEFAULTS = {'zsync_hashes': False,
'zsync_block_size_for_1G': None,
'chunked_hashes': True,
'chunk_size': 262144,
'apache_documentroot': None}
@@ -74,6 +75,11 @@ def __init__(self, conffile='/etc/mirrorbrain.conf', instance=None):
'cannot parse setting in [%s] section: %r' % (i, b + str(e)), conffile)
except configparser.NoOptionError as e:
pass
try:
self.general[i]['zsync_block_size_for_1G'] = adjust_zsync_block_size_for_1G(cp.getint(i, 'zsync_block_size_for_1G'))
except configparser.NoOptionError as e:
pass

# set default values where the config didn't define anything
for d in DEFAULTS:
try:
@@ -95,3 +101,19 @@ def __init__(self, conffile='/etc/mirrorbrain.conf', instance=None):
# take care of the [mirrorprobe] section
#
self.mirrorprobe = dict(cp.items('mirrorprobe'))

def adjust_zsync_block_size_for_1G(n):
if n < 1024:
print("zsync_block_size_for_1G is too small, ignoring", file=sys.stderr);
return DEFAULTS['zsync_block_size_for_1G']
if (n & (n-1) == 0) and n != 0:
return n

exponent = 0
while n >= 2:
n /= 2
exponent += 1
n = 2 ** exponent

print("zsync_block_size_for_1G must be power of 2 (512, 1024, 2048, ...), adjusting down to: " + repr(n)) # , file=sys.stderr);
return n
12 changes: 8 additions & 4 deletions mb/mb/hashes.py
Original file line number Diff line number Diff line change
@@ -29,7 +29,7 @@ class Hasheable:

def __init__(self, basename, src_dir=None, dst_dir=None,
base_dir=None, do_zsync_hashes=False,
do_chunked_hashes=True, chunk_size=DEFAULT_PIECESIZE, do_chunked_with_zsync=False):
do_chunked_hashes=True, chunk_size=DEFAULT_PIECESIZE, do_chunked_with_zsync=False, zsync_block_size_for_1G=None):
self.basename = basename
if src_dir:
self.src_dir = src_dir
@@ -58,6 +58,7 @@ def __init__(self, basename, src_dir=None, dst_dir=None,
self.hb.do_chunked_hashes = do_chunked_hashes
self.hb.do_chunked_with_zsync = do_chunked_with_zsync
self.hb.chunk_size = chunk_size
self.hb.zsync_block_size_for_1G = zsync_block_size_for_1G

def islink(self):
return stat.S_ISLNK(self.mode)
@@ -151,7 +152,7 @@ def check_db(self, conn, verbose=False, dry_run=False, force=False):
file_id = c.fetchone()[0]
zsums = ''
for i in self.hb.zsums:
zsums = zsums + i.hexdigest()
zsums = zsums + i.hex()

c.execute("""INSERT INTO hash (file_id, mtime, size, md5,
sha1, sha256, sha1piecesize,
@@ -194,7 +195,7 @@ def check_db(self, conn, verbose=False, dry_run=False, force=False):

zsums = ''
for i in self.hb.zsums:
zsums = zsums + i.hexdigest()
zsums = zsums + i.hex()

c.execute("""UPDATE hash set mtime = %s, size = %s,
md5 = decode(%s, 'hex'),
@@ -367,7 +368,9 @@ def zs_guess_zsync_params(self):
import math

size = self.h.size
if size < 100000000:
if size > 1024*1024*1024 and self.zsync_block_size_for_1G is not None:
blocksize = self.zsync_block_size_for_1G
elif size < 100000000:
blocksize = 2048
else:
blocksize = 4096
@@ -423,6 +426,7 @@ def zs_get_block_sums(self, buf):
c = md4.digest()

if self.do_zsync_hashes:
import zsync
r = zsync.rsum06(block)

# save only some trailing bytes
4 changes: 3 additions & 1 deletion mb/scripts/mb
Original file line number Diff line number Diff line change
@@ -1177,7 +1177,9 @@ class MirrorDoctor(cmdln.Cmdln):
do_chunked_hashes=self.config.dbconfig.get(
'chunked_hashes'),
chunk_size=chunk_size,
do_chunked_with_zsync=do_chunked_with_zsync)
do_chunked_with_zsync=do_chunked_with_zsync,
zsync_block_size_for_1G=self.config.dbconfig.get(
'zsync_block_size_for_1G'))
except OSError as e:
if e.errno == errno.ENOENT:
sys.stderr.write('File vanished: %r\n' % src)

0 comments on commit 7c13aa1

Please sign in to comment.