Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parametrize zsync block size for huge files #47

Merged
merged 2 commits into from
Jul 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions mb/mb/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
boolean_opts = ['zsync_hashes', 'chunked_hashes']

DEFAULTS = {'zsync_hashes': False,
'zsync_block_size_for_1G': None,
'chunked_hashes': True,
'chunk_size': 262144,
'apache_documentroot': None}
Expand Down Expand Up @@ -74,6 +75,11 @@ def __init__(self, conffile='/etc/mirrorbrain.conf', instance=None):
'cannot parse setting in [%s] section: %r' % (i, b + str(e)), conffile)
except configparser.NoOptionError as e:
pass
try:
self.general[i]['zsync_block_size_for_1G'] = adjust_zsync_block_size_for_1G(cp.getint(i, 'zsync_block_size_for_1G'))
except configparser.NoOptionError as e:
pass

# set default values where the config didn't define anything
for d in DEFAULTS:
try:
Expand All @@ -95,3 +101,19 @@ def __init__(self, conffile='/etc/mirrorbrain.conf', instance=None):
# take care of the [mirrorprobe] section
#
self.mirrorprobe = dict(cp.items('mirrorprobe'))

def adjust_zsync_block_size_for_1G(n):
if n < 1024:
print("zsync_block_size_for_1G is too small, ignoring", file=sys.stderr);
return DEFAULTS['zsync_block_size_for_1G']
if (n & (n-1) == 0) and n != 0:
return n

exponent = 0
while n >= 2:
n /= 2
exponent += 1
n = 2 ** exponent

print("zsync_block_size_for_1G must be power of 2 (512, 1024, 2048, ...), adjusting down to: " + repr(n)) # , file=sys.stderr);
return n
12 changes: 8 additions & 4 deletions mb/mb/hashes.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Hasheable:

def __init__(self, basename, src_dir=None,
base_dir=None, do_zsync_hashes=False,
do_chunked_hashes=True, chunk_size=DEFAULT_PIECESIZE, do_chunked_with_zsync=False, skip_metadata=False):
do_chunked_hashes=True, chunk_size=DEFAULT_PIECESIZE, do_chunked_with_zsync=False, skip_metadata=False, zsync_block_size_for_1G=None):
self.basename = basename
if src_dir:
self.src_dir = src_dir
Expand All @@ -55,6 +55,7 @@ def __init__(self, basename, src_dir=None,
self.hb.do_chunked_hashes = do_chunked_hashes
self.hb.do_chunked_with_zsync = do_chunked_with_zsync
self.hb.chunk_size = chunk_size
self.hb.zsync_block_size_for_1G = zsync_block_size_for_1G

def islink(self):
return stat.S_ISLNK(self.mode)
Expand Down Expand Up @@ -114,7 +115,7 @@ def check_db(self, conn, verbose=False, dry_run=False, force=False):

zsums = ''
for i in self.hb.zsums:
zsums = zsums + i.hexdigest()
zsums = zsums + i.hex()

c.execute("""UPDATE files SET
mtime = to_timestamp(%s),
Expand Down Expand Up @@ -162,7 +163,7 @@ def check_db(self, conn, verbose=False, dry_run=False, force=False):

zsums = ''
for i in self.hb.zsums:
zsums = zsums + i.hexdigest()
zsums = zsums + i.hex()

c.execute("""UPDATE files set mtime = to_timestamp(%s), size = %s,
md5 = decode(%s, 'hex'),
Expand Down Expand Up @@ -333,7 +334,9 @@ def zs_guess_zsync_params(self):
import math

size = self.h.size
if size < 100000000:
if size > 1024*1024*1024 and self.zsync_block_size_for_1G is not None:
blocksize = self.zsync_block_size_for_1G
elif size < 100000000:
blocksize = 2048
else:
blocksize = 4096
Expand Down Expand Up @@ -389,6 +392,7 @@ def zs_get_block_sums(self, buf):
c = md4.digest()

if self.do_zsync_hashes:
import zsync
r = zsync.rsum06(block)

# save only some trailing bytes
Expand Down
4 changes: 3 additions & 1 deletion mb/scripts/mb
Original file line number Diff line number Diff line change
Expand Up @@ -1158,7 +1158,9 @@ class MirrorDoctor(cmdln.Cmdln):
'chunked_hashes'),
chunk_size=chunk_size,
do_chunked_with_zsync=do_chunked_with_zsync,
skip_metadata=skip_metadata)
skip_metadata=skip_metadata,
zsync_block_size_for_1G=self.config.dbconfig.get(
'zsync_block_size_for_1G'))
except OSError as e:
if e.errno == errno.ENOENT:
sys.stderr.write('File vanished: %r\n' % src)
Expand Down
22 changes: 22 additions & 0 deletions mb/tests/conf_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import unittest

from mb.conf import adjust_zsync_block_size_for_1G


class TestConfig(unittest.TestCase):

def test_adjust_zsync_block_size_for_1G(self):
cases = {
0: None,
1023: None,
1025: 1024,
3*1024: 2*1024,
4*1024: 4*1024,
4*1024+1: 4*1024,
1024*1024*1024+1: 1024*1024*1024
}
for n in cases:
self.assertEqual(cases[n], adjust_zsync_block_size_for_1G(n), "for input " + repr(n))

if __name__ == '__main__':
unittest.main()