Skip to content

Commit

Permalink
Parametrize zsync bloc size for huge files
Browse files Browse the repository at this point in the history
  • Loading branch information
andrii-suse committed Jun 4, 2020
1 parent e9c76fb commit 1b03d99
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 5 deletions.
6 changes: 6 additions & 0 deletions mb/mb/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
boolean_opts = ['zsync_hashes', 'chunked_hashes']

DEFAULTS = {'zsync_hashes': False,
'zsync_block_size_for_1G': None,
'chunked_hashes': True,
'chunk_size': 262144,
'apache_documentroot': None}
Expand Down Expand Up @@ -74,6 +75,11 @@ def __init__(self, conffile='/etc/mirrorbrain.conf', instance=None):
'cannot parse setting in [%s] section: %r' % (i, b + str(e)), conffile)
except configparser.NoOptionError as e:
pass
try:
self.general[i]['zsync_block_size_for_1G'] = cp.getint(i, 'zsync_block_size_for_1G')
except configparser.NoOptionError as e:
pass

# set default values where the config didn't define anything
for d in DEFAULTS:
try:
Expand Down
12 changes: 8 additions & 4 deletions mb/mb/hashes.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Hasheable:

def __init__(self, basename, src_dir=None, dst_dir=None,
base_dir=None, do_zsync_hashes=False,
do_chunked_hashes=True, chunk_size=DEFAULT_PIECESIZE, do_chunked_with_zsync=False):
do_chunked_hashes=True, chunk_size=DEFAULT_PIECESIZE, do_chunked_with_zsync=False, zsync_block_size_for_1G=None):
self.basename = basename
if src_dir:
self.src_dir = src_dir
Expand Down Expand Up @@ -58,6 +58,7 @@ def __init__(self, basename, src_dir=None, dst_dir=None,
self.hb.do_chunked_hashes = do_chunked_hashes
self.hb.do_chunked_with_zsync = do_chunked_with_zsync
self.hb.chunk_size = chunk_size
self.hb.zsync_block_size_for_1G = zsync_block_size_for_1G

def islink(self):
return stat.S_ISLNK(self.mode)
Expand Down Expand Up @@ -151,7 +152,7 @@ def check_db(self, conn, verbose=False, dry_run=False, force=False):
file_id = c.fetchone()[0]
zsums = ''
for i in self.hb.zsums:
zsums = zsums + i.hexdigest()
zsums = zsums + i.hex()

c.execute("""INSERT INTO hash (file_id, mtime, size, md5,
sha1, sha256, sha1piecesize,
Expand Down Expand Up @@ -194,7 +195,7 @@ def check_db(self, conn, verbose=False, dry_run=False, force=False):

zsums = ''
for i in self.hb.zsums:
zsums = zsums + i.hexdigest()
zsums = zsums + i.hex()

c.execute("""UPDATE hash set mtime = %s, size = %s,
md5 = decode(%s, 'hex'),
Expand Down Expand Up @@ -367,7 +368,9 @@ def zs_guess_zsync_params(self):
import math

size = self.h.size
if size < 100000000:
if size > 1024*1024*1024 and self.zsync_block_size_for_1G is not None:
blocksize = self.zsync_block_size_for_1G
elif size < 100000000:
blocksize = 2048
else:
blocksize = 4096
Expand Down Expand Up @@ -423,6 +426,7 @@ def zs_get_block_sums(self, buf):
c = md4.digest()

if self.do_zsync_hashes:
import zsync
r = zsync.rsum06(block)

# save only some trailing bytes
Expand Down
4 changes: 3 additions & 1 deletion mb/scripts/mb
Original file line number Diff line number Diff line change
Expand Up @@ -1177,7 +1177,9 @@ class MirrorDoctor(cmdln.Cmdln):
do_chunked_hashes=self.config.dbconfig.get(
'chunked_hashes'),
chunk_size=chunk_size,
do_chunked_with_zsync=do_chunked_with_zsync)
do_chunked_with_zsync=do_chunked_with_zsync,
zsync_block_size_for_1G=self.config.dbconfig.get(
'zsync_block_size_for_1G'))
except OSError as e:
if e.errno == errno.ENOENT:
sys.stderr.write('File vanished: %r\n' % src)
Expand Down

0 comments on commit 1b03d99

Please sign in to comment.