Skip to content

Commit

Permalink
Merge pull request #3548 from gum3ng/issue_3490
Browse files Browse the repository at this point in the history
Support for specifying amount of data in read-data-subset
  • Loading branch information
MichaelEischer authored Nov 5, 2021
2 parents 78c7dd5 + 836fbb9 commit 6c84ea1
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 8 deletions.
9 changes: 9 additions & 0 deletions changelog/unreleased/issue-3490
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Enhancement: Support for specifying file size in `check --read-data-subset`

To check a subset of repository files, the `check --read-data-subset` command
used to support two ways to select a subset - A specific range of pack files,
or random percentage of pack files. We have added a third method to select pack
files - By specifying file size. This new option is available with the 'restic check' command.

https://github.com/restic/restic/issues/3490
https://github.com/restic/restic/pull/3548
43 changes: 38 additions & 5 deletions cmd/restic/cmd_check.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func checkFlags(opts CheckOptions) error {
}
if opts.ReadDataSubset != "" {
dataSubset, err := stringToIntSlice(opts.ReadDataSubset)
argumentError := errors.Fatal("check flag --read-data-subset must have two positive integer values or a percentage, e.g. --read-data-subset=1/2 or --read-data-subset=2.5%%")
argumentError := errors.Fatal("check flag --read-data-subset must have two positive integer values or a percentage or a file size, e.g. --read-data-subset=1/2 or --read-data-subset=2.5%% or --read-data-subset=10G")
if err == nil {
if len(dataSubset) != 2 {
return argumentError
Expand All @@ -76,7 +76,7 @@ func checkFlags(opts CheckOptions) error {
if dataSubset[1] > totalBucketsMax {
return errors.Fatalf("check flag --read-data-subset=n/t t must be at most %d", totalBucketsMax)
}
} else {
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
percentage, err := parsePercentage(opts.ReadDataSubset)
if err != nil {
return argumentError
Expand All @@ -86,6 +86,17 @@ func checkFlags(opts CheckOptions) error {
return errors.Fatal(
"check flag --read-data-subset=n% n must be above 0.0% and at most 100.0%")
}

} else {
fileSize, err := parseSizeStr(opts.ReadDataSubset)
if err != nil {
return argumentError
}
if fileSize <= 0.0 {
return errors.Fatal(
"check flag --read-data-subset=n n must be above 0.0")
}

}
}

Expand Down Expand Up @@ -294,10 +305,27 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets)
packCount := uint64(len(packs))
Verbosef("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets)
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
percentage, err := parsePercentage(opts.ReadDataSubset)
if err == nil {
packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage)
Verbosef("read %.1f%% of data packs\n", percentage)
}
} else {
percentage, _ := parsePercentage(opts.ReadDataSubset)
packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage)
Verbosef("read %.1f%% of data packs\n", percentage)
repoSize := int64(0)
allPacks := chkr.GetPacks()
for _, size := range allPacks {
repoSize += size
}
if repoSize == 0 {
return errors.Fatal("Cannot read from a repository having size 0")
}
subsetSize, _ := parseSizeStr(opts.ReadDataSubset)
if subsetSize > repoSize {
subsetSize = repoSize
}
packs = selectRandomPacksByFileSize(chkr.GetPacks(), subsetSize, repoSize)
Verbosef("read %d bytes of data packs\n", subsetSize)
}
if packs == nil {
return errors.Fatal("internal error: failed to select packs to check")
Expand Down Expand Up @@ -349,6 +377,11 @@ func selectRandomPacksByPercentage(allPacks map[restic.ID]int64, percentage floa
id := keys[idx[i]]
packs[id] = allPacks[id]
}
return packs
}

func selectRandomPacksByFileSize(allPacks map[restic.ID]int64, subsetSize int64, repoSize int64) map[restic.ID]int64 {
subsetPercentage := (float64(subsetSize) / float64(repoSize)) * 100.0
packs := selectRandomPacksByPercentage(allPacks, subsetPercentage)
return packs
}
34 changes: 34 additions & 0 deletions cmd/restic/cmd_check_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,37 @@ func TestSelectNoRandomPacksByPercentage(t *testing.T) {
selectedPacks := selectRandomPacksByPercentage(testPacks, 10.0)
rtest.Assert(t, len(selectedPacks) == 0, "Expected 0 selected packs")
}

func TestSelectRandomPacksByFileSize(t *testing.T) {
var testPacks = make(map[restic.ID]int64)
for i := 1; i <= 10; i++ {
id := restic.NewRandomID()
// ensure unique ids
id[0] = byte(i)
testPacks[id] = 0
}

selectedPacks := selectRandomPacksByFileSize(testPacks, 10, 500)
rtest.Assert(t, len(selectedPacks) == 1, "Expected 1 selected packs")

selectedPacks = selectRandomPacksByFileSize(testPacks, 10240, 51200)
rtest.Assert(t, len(selectedPacks) == 2, "Expected 2 selected packs")
for pack := range selectedPacks {
_, ok := testPacks[pack]
rtest.Assert(t, ok, "Unexpected selection")
}

selectedPacks = selectRandomPacksByFileSize(testPacks, 500, 500)
rtest.Assert(t, len(selectedPacks) == 10, "Expected 10 selected packs")
for pack := range selectedPacks {
_, ok := testPacks[pack]
rtest.Assert(t, ok, "Unexpected item in selection")
}
}

func TestSelectNoRandomPacksByFileSize(t *testing.T) {
// that the a repository without pack files works
var testPacks = make(map[restic.ID]int64)
selectedPacks := selectRandomPacksByFileSize(testPacks, 10, 500)
rtest.Assert(t, len(selectedPacks) == 0, "Expected 0 selected packs")
}
19 changes: 16 additions & 3 deletions doc/045_working_with_repos.rst
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,9 @@ integrity of the pack files in the repository, use the ``--read-data`` flag:
and also that it takes more time than the default ``check``.

Alternatively, use the ``--read-data-subset`` parameter to check only a
subset of the repository pack files at a time. It supports two ways to select a
subset. One selects a specific range of pack files, the other selects a random
percentage of pack files.
subset of the repository pack files at a time. It supports three ways to select a
subset. One selects a specific range of pack files, the second selects a random
percentage of pack files, and the third selects pack files of the specified size.

Use ``--read-data-subset=n/t`` to check only a subset of the repository pack
files at a time. The parameter takes two values, ``n`` and ``t``. When the check
Expand Down Expand Up @@ -285,3 +285,16 @@ integer:
.. code-block:: console
$ restic -r /srv/restic-repo check --read-data-subset=10%
Use ``--read-data-subset=NS`` to check a randomly chosen subset of the repository pack files.
It takes one parameter, ``NS``, where 'N' is a whole number representing file size and 'S' is the unit
of file size (B/K/M/G/T) of pack files to check. Behind the scenes, the specified size will be converted
to percentage of the total repository size. The behaviour of the check command following this conversion
will be the same as the percentage option above. For a file size value the following command may be used:

.. code-block:: console
$ restic -r /srv/restic-repo check --read-data-subset=50M
$ restic -r /srv/restic-repo check --read-data-subset=10G

0 comments on commit 6c84ea1

Please sign in to comment.