Skip to content

Commit

Permalink
Add information about the seed indexes. (#199)
Browse files Browse the repository at this point in the history
This adds the ability to add one or more seed indexes to the `info`
command.

The resulting output will contain the number of chunks, from the index
provided in input, that are available in the seed. Similarly to what
it's already done with the store.

Additionally it also shows the size of the unique (non duplicated)
chunks that are not available from the seed.
This information can be used as a way to roughly estimate the download
size of a particular update. For example if we use:
```
desync info --seed local.caibx -s https://[...]/store update.caibx
```
We will get in output the size of the chunks that are required by
`update.caibx` but that are not already available in our local seed. So
these will be the chunks that we must download from the remote store.

Signed-off-by: Ludovico de Nittis <ludovico.denittis@collabora.com>
  • Loading branch information
RyuzakiKK authored Nov 25, 2021
1 parent 8eb0446 commit 8d1be78
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 12 deletions.
63 changes: 51 additions & 12 deletions cmd/desync/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
type infoOptions struct {
cmdStoreOptions
stores []string
seeds []string
printFormat string
}

Expand All @@ -22,9 +23,11 @@ func newInfoCommand(ctx context.Context) *cobra.Command {
cmd := &cobra.Command{
Use: "info <index>",
Short: "Show information about an index",
Long: `Displays information about the provided index, such as number of chunks. If a
Long: `Displays information about the provided index, such as the number of chunks
and the total size of unique chunks that are not available in the seed. If a
store is provided, it'll also show how many of the chunks are present in the
store. Use '-' to read the index from STDIN.`,
store. If one or more seed indexes are provided, the number of chunks available
in the seeds are also shown. Use '-' to read the index from STDIN.`,
Example: ` desync info -s /path/to/local --format=json file.caibx`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
Expand All @@ -34,6 +37,7 @@ store. Use '-' to read the index from STDIN.`,
}
flags := cmd.Flags()
flags.StringSliceVarP(&opt.stores, "store", "s", nil, "source store(s)")
flags.StringSliceVar(&opt.seeds, "seed", nil, "seed indexes")
flags.StringVarP(&opt.printFormat, "format", "f", "json", "output format, plain or json")
addStoreOptions(&opt.cmdStoreOptions, flags)
return cmd
Expand All @@ -51,13 +55,31 @@ func runInfo(ctx context.Context, opt infoOptions, args []string) error {
}

var results struct {
Total int `json:"total"`
Unique int `json:"unique"`
InStore uint64 `json:"in-store"`
Size uint64 `json:"size"`
ChunkSizeMin uint64 `json:"chunk-size-min"`
ChunkSizeAvg uint64 `json:"chunk-size-avg"`
ChunkSizeMax uint64 `json:"chunk-size-max"`
Total int `json:"total"`
Unique int `json:"unique"`
InStore uint64 `json:"in-store"`
InSeed uint64 `json:"in-seed"`
Size uint64 `json:"size"`
SizeNotInSeed uint64 `json:"dedup-size-not-in-seed"`
ChunkSizeMin uint64 `json:"chunk-size-min"`
ChunkSizeAvg uint64 `json:"chunk-size-avg"`
ChunkSizeMax uint64 `json:"chunk-size-max"`
}

dedupedSeeds := make(map[desync.ChunkID]struct{})
for _, seed := range opt.seeds {
caibxSeed, err := readCaibxFile(seed, opt.cmdStoreOptions)
if err != nil {
return err
}
for _, chunk := range caibxSeed.Chunks {
dedupedSeeds[chunk.ID] = struct{}{}
select {
case <-ctx.Done():
return nil
default:
}
}
}

// Calculate the size of the blob, from the last chunk
Expand All @@ -71,16 +93,31 @@ func runInfo(ctx context.Context, opt infoOptions, args []string) error {
results.ChunkSizeAvg = c.Index.ChunkSizeAvg
results.ChunkSizeMax = c.Index.ChunkSizeMax

// Go through each chunk to count and de-dup them with a map
// Go through each chunk from the index to count them, de-dup each chunks
// with a map and calculate the size of the chunks that are not available
// in seed
deduped := make(map[desync.ChunkID]struct{})
for _, chunk := range c.Chunks {
results.Total++
deduped[chunk.ID] = struct{}{}
select {
case <-ctx.Done():
return nil
default:
}

results.Total++
if _, duplicatedChunk := deduped[chunk.ID]; duplicatedChunk {
// This is a duplicated chunk, do not count it again in the seed
continue
}

deduped[chunk.ID] = struct{}{}
if _, isAvailable := dedupedSeeds[chunk.ID]; isAvailable {
// This chunk is available in the seed
results.InSeed++
} else {
// The seed doesn't have this chunk, sum its size
results.SizeNotInSeed += chunk.Size
}
}
results.Unique = len(deduped)

Expand Down Expand Up @@ -118,9 +155,11 @@ func runInfo(ctx context.Context, opt infoOptions, args []string) error {
}
case "plain":
fmt.Println("Blob size:", results.Size)
fmt.Println("Size of deduplicated chunks not in seed:", results.SizeNotInSeed)
fmt.Println("Total chunks:", results.Total)
fmt.Println("Unique chunks:", results.Unique)
fmt.Println("Chunks in store:", results.InStore)
fmt.Println("Chunks in seed:", results.InSeed)
fmt.Println("Chunk size min:", results.ChunkSizeMin)
fmt.Println("Chunk size avg:", results.ChunkSizeAvg)
fmt.Println("Chunk size max:", results.ChunkSizeMax)
Expand Down
39 changes: 39 additions & 0 deletions cmd/desync/info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ func TestInfoCommand(t *testing.T) {
"total": 161,
"unique": 131,
"in-store": 131,
"in-seed": 0,
"size": 2097152,
"dedup-size-not-in-seed": 1114112,
"chunk-size-min": 2048,
"chunk-size-avg": 8192,
"chunk-size-max": 32768
Expand All @@ -40,3 +42,40 @@ func TestInfoCommand(t *testing.T) {
require.NoError(t, err)
require.Equal(t, exp, got)
}

func TestInfoCommandWithSeed(t *testing.T) {
expectedOutput := []byte(`{
"total": 161,
"unique": 131,
"in-store": 131,
"in-seed": 124,
"size": 2097152,
"dedup-size-not-in-seed": 80029,
"chunk-size-min": 2048,
"chunk-size-avg": 8192,
"chunk-size-max": 32768
}`)
exp := make(map[string]interface{})
err := json.Unmarshal(expectedOutput, &exp)
require.NoError(t, err)

cmd := newInfoCommand(context.Background())
cmd.SetArgs([]string{
"-s", "testdata/blob1.store",
"--seed", "testdata/blob2.caibx",
"testdata/blob1.caibx",
})
b := new(bytes.Buffer)

// Redirect the command's output
stdout = b
cmd.SetOutput(ioutil.Discard)
_, err = cmd.ExecuteC()
require.NoError(t, err)

// Decode the output and compare to what's expected
got := make(map[string]interface{})
err = json.Unmarshal(b.Bytes(), &got)
require.NoError(t, err)
require.Equal(t, exp, got)
}

0 comments on commit 8d1be78

Please sign in to comment.