From 948348b49339b887f7f30ffae8ddf0caeac7e18c Mon Sep 17 00:00:00 2001 From: Adin Schmahmann Date: Fri, 10 Jul 2020 19:43:15 -0400 Subject: [PATCH 1/5] add ipfs dag stat command --- core/commands/commands_test.go | 2 + core/commands/dag/dag.go | 134 +++++++++++++++++++++++++++++++++ core/commands/root.go | 1 + 3 files changed, 137 insertions(+) diff --git a/core/commands/commands_test.go b/core/commands/commands_test.go index 940b9e0f43f..9584ff70786 100644 --- a/core/commands/commands_test.go +++ b/core/commands/commands_test.go @@ -25,6 +25,7 @@ func TestROCommands(t *testing.T) { "/dag", "/dag/get", "/dag/resolve", + "/dag/stat", "/dns", "/get", "/ls", @@ -99,6 +100,7 @@ func TestCommands(t *testing.T) { "/dag/put", "/dag/import", "/dag/resolve", + "/dag/stat", "/dht", "/dht/findpeer", "/dht/findprovs", diff --git a/core/commands/dag/dag.go b/core/commands/dag/dag.go index 85588be2f14..141d9b4d436 100644 --- a/core/commands/dag/dag.go +++ b/core/commands/dag/dag.go @@ -10,6 +10,7 @@ import ( "time" "github.com/ipfs/go-ipfs/core/commands/cmdenv" + "github.com/ipfs/go-ipfs/core/commands/e" "github.com/ipfs/go-ipfs/core/coredag" iface "github.com/ipfs/interface-go-ipfs-core" @@ -19,6 +20,7 @@ import ( files "github.com/ipfs/go-ipfs-files" ipld "github.com/ipfs/go-ipld-format" mdag "github.com/ipfs/go-merkledag" + traverse "github.com/ipfs/go-merkledag/traverse" ipfspath "github.com/ipfs/go-path" "github.com/ipfs/interface-go-ipfs-core/options" path "github.com/ipfs/interface-go-ipfs-core/path" @@ -54,6 +56,7 @@ to deprecate and replace the existing 'ipfs object' command moving forward. "resolve": DagResolveCmd, "import": DagImportCmd, "export": DagExportCmd, + "stat": DagStatCmd, }, } @@ -668,3 +671,134 @@ The output of blocks happens in strict DAG-traversal, first-seen, order. }, }, } + +type DagStat struct { + Size uint64 + NumBlocks int64 +} + +func (s *DagStat) String() string { + return fmt.Sprintf("Size: %d, NumBlocks: %d", s.Size, s.NumBlocks) +} + +var DagStatCmd = &cmds.Command{ + Helptext: cmds.HelpText{ + Tagline: "Gets stats for a DAG", + ShortDescription: ` +'ipfs dag size' fetches a dag and returns various statistics about the DAG. +Statistics include size and number of blocks. + +Note: This command skips duplicate blocks in reporting both size and the number of blocks +`, + }, + Arguments: []cmds.Argument{ + cmds.StringArg("root", true, false, "CID of a DAG root to get statistics for").EnableStdin(), + }, + Options: []cmds.Option{ + cmds.BoolOption(progressOptionName, "p", "Return progressive data while reading through the DAG").WithDefault("true"), + }, + Run: func(req *cmds.Request, res cmds.ResponseEmitter, env cmds.Environment) error { + progressive := req.Options[progressOptionName].(bool) + + api, err := cmdenv.GetApi(env, req) + if err != nil { + return err + } + + rp, err := api.ResolvePath(req.Context, path.New(req.Arguments[0])) + if err != nil { + return err + } + + if len(rp.Remainder()) > 0 { + return fmt.Errorf("cannot return size for anything other than a DAG with a root CID") + } + + obj, err := api.Dag().Get(req.Context, rp.Cid()) + if err != nil { + return err + } + + dagstats := &DagStat{} + err = traverse.Traverse(obj, traverse.Options{ + DAG: api.Dag(), + Order: traverse.DFSPre, + Func: func(current traverse.State) error { + stat, err := current.Node.Stat() + if err != nil { + return err + } + + // stat.BlockSize gives the raw size of the data which is what we want here. + // However, node.Stat() is not implemented by any node types other than DagPB, which defines Size() as + // a field stored in the data instead of the raw size of the data itself. + // + // Therefore, we check if stat is defined and if so use `stat.BlockSize` and otherwise just rely on the + // Size() function. + if len(stat.Hash) == 0 { + size, err := current.Node.Size() + if err != nil { + return err + } + dagstats.Size += size + } else { + dagstats.Size += uint64(stat.BlockSize) + } + dagstats.NumBlocks++ + + if progressive { + if err := res.Emit(dagstats); err != nil { + return err + } + } + return nil + }, + ErrFunc: nil, + SkipDuplicates: true, + }) + if err != nil { + return fmt.Errorf("error traversing DAG: %w", err) + } + + if !progressive { + if err := res.Emit(dagstats); err != nil { + return err + } + } + + return nil + }, + Type: DagStat{}, + PostRun: cmds.PostRunMap{ + cmds.CLI: func(res cmds.Response, re cmds.ResponseEmitter) error { + var dagStats *DagStat + for { + v, err := res.Next() + if err != nil { + if err == io.EOF { + break + } + return err + } + + out, ok := v.(*DagStat) + if !ok { + return e.TypeErr(out, v) + } + dagStats = out + fmt.Fprintf(os.Stderr, "%v\r", out) + } + return re.Emit(dagStats) + }, + }, + Encoders: cmds.EncoderMap{ + cmds.Text: cmds.MakeTypedEncoder(func(req *cmds.Request, w io.Writer, event *DagStat) error { + _, err := fmt.Fprintf( + w, + "%v", + event, + ) + return err + }), + }, +} diff --git a/core/commands/root.go b/core/commands/root.go index 5d00770f006..b9a8dc40909 100644 --- a/core/commands/root.go +++ b/core/commands/root.go @@ -191,6 +191,7 @@ var rootROSubcommands = map[string]*cmds.Command{ Subcommands: map[string]*cmds.Command{ "get": dag.DagGetCmd, "resolve": dag.DagResolveCmd, + "stat": dag.DagStatCmd, }, }, "resolve": ResolveCmd, From 87ae17d34a6465af142768091eb16ad70312c0d9 Mon Sep 17 00:00:00 2001 From: Adin Schmahmann Date: Tue, 11 Aug 2020 17:10:39 -0400 Subject: [PATCH 2/5] use raw node size instead of working with node fields like Stat --- core/commands/dag/dag.go | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/core/commands/dag/dag.go b/core/commands/dag/dag.go index 141d9b4d436..847ccaa2ace 100644 --- a/core/commands/dag/dag.go +++ b/core/commands/dag/dag.go @@ -724,26 +724,7 @@ Note: This command skips duplicate blocks in reporting both size and the number DAG: api.Dag(), Order: traverse.DFSPre, Func: func(current traverse.State) error { - stat, err := current.Node.Stat() - if err != nil { - return err - } - - // stat.BlockSize gives the raw size of the data which is what we want here. - // However, node.Stat() is not implemented by any node types other than DagPB, which defines Size() as - // a field stored in the data instead of the raw size of the data itself. - // - // Therefore, we check if stat is defined and if so use `stat.BlockSize` and otherwise just rely on the - // Size() function. - if len(stat.Hash) == 0 { - size, err := current.Node.Size() - if err != nil { - return err - } - dagstats.Size += size - } else { - dagstats.Size += uint64(stat.BlockSize) - } + dagstats.Size += uint64(len(current.Node.RawData())) dagstats.NumBlocks++ if progressive { From 640ed77f8ae5a0d1d560d47d221551ddb70b709f Mon Sep 17 00:00:00 2001 From: Adin Schmahmann Date: Mon, 17 Aug 2020 02:01:38 -0400 Subject: [PATCH 3/5] fixed dag stat command default from true to true --- core/commands/dag/dag.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/commands/dag/dag.go b/core/commands/dag/dag.go index 847ccaa2ace..c055151b726 100644 --- a/core/commands/dag/dag.go +++ b/core/commands/dag/dag.go @@ -695,7 +695,7 @@ Note: This command skips duplicate blocks in reporting both size and the number cmds.StringArg("root", true, false, "CID of a DAG root to get statistics for").EnableStdin(), }, Options: []cmds.Option{ - cmds.BoolOption(progressOptionName, "p", "Return progressive data while reading through the DAG").WithDefault("true"), + cmds.BoolOption(progressOptionName, "p", "Return progressive data while reading through the DAG").WithDefault(true), }, Run: func(req *cmds.Request, res cmds.ResponseEmitter, env cmds.Environment) error { progressive := req.Options[progressOptionName].(bool) From d702fe71806de7cb02c39c6bbd1824e66bbe4af6 Mon Sep 17 00:00:00 2001 From: Adin Schmahmann Date: Mon, 17 Aug 2020 04:05:58 -0400 Subject: [PATCH 4/5] sharness: added ipfs dag stat tests --- core/commands/dag/dag.go | 2 +- test/sharness/t0053-dag.sh | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/core/commands/dag/dag.go b/core/commands/dag/dag.go index c055151b726..5ec3be8ee1e 100644 --- a/core/commands/dag/dag.go +++ b/core/commands/dag/dag.go @@ -776,7 +776,7 @@ Note: This command skips duplicate blocks in reporting both size and the number cmds.Text: cmds.MakeTypedEncoder(func(req *cmds.Request, w io.Writer, event *DagStat) error { _, err := fmt.Fprintf( w, - "%v", + "%v\n", event, ) return err diff --git a/test/sharness/t0053-dag.sh b/test/sharness/t0053-dag.sh index de35a58fd1c..2225f79ec4d 100755 --- a/test/sharness/t0053-dag.sh +++ b/test/sharness/t0053-dag.sh @@ -268,6 +268,41 @@ test_dag_cmd() { test_cmp resolve_obj_exp resolve_obj && test_cmp resolve_data_exp resolve_data ' + + test_expect_success "dag stat of simple IPLD object" ' + ipfs dag stat $NESTED_HASH > actual_stat_inner_ipld_obj && + echo "Size: 15, NumBlocks: 1" > exp_stat_inner_ipld_obj && + test_cmp exp_stat_inner_ipld_obj actual_stat_inner_ipld_obj && + ipfs dag stat $HASH > actual_stat_ipld_obj && + echo "Size: 61, NumBlocks: 2" > exp_stat_ipld_obj && + test_cmp exp_stat_ipld_obj actual_stat_ipld_obj + ' + + test_expect_success "dag stat of simple UnixFS object" ' + BASIC_UNIXFS=$(echo "1234" | ipfs add --pin=false -q) && + ipfs dag stat $BASIC_UNIXFS > actual_stat_basic_unixfs && + echo "Size: 13, NumBlocks: 1" > exp_stat_basic_unixfs && + test_cmp exp_stat_basic_unixfs actual_stat_basic_unixfs + ' + + # The multiblock file is just 10000000 copies of the number 1 + # As most of its data is replicated it should have a small number of blocks + test_expect_success "dag stat of multiblock UnixFS object" ' + MULTIBLOCK_UNIXFS=$(printf "1%.0s" {1..10000000} | ipfs add --pin=false -q) && + ipfs dag stat $MULTIBLOCK_UNIXFS > actual_stat_multiblock_unixfs && + echo "Size: 302582, NumBlocks: 3" > exp_stat_multiblock_unixfs && + test_cmp exp_stat_multiblock_unixfs actual_stat_multiblock_unixfs + ' + + test_expect_success "dag stat of directory of UnixFS objects" ' + mkdir -p unixfsdir && + echo "1234" > unixfsdir/small.txt + printf "1%.0s" {1..10000000} > unixfsdir/many1s.txt && + DIRECTORY_UNIXFS=$(ipfs add -r --pin=false -Q unixfsdir) && + ipfs dag stat $DIRECTORY_UNIXFS > actual_stat_directory_unixfs && + echo "Size: 302705, NumBlocks: 5" > exp_stat_directory_unixfs && + test_cmp exp_stat_directory_unixfs actual_stat_directory_unixfs + ' } # should work offline From 6a822fe7a94a4bab99db6645e3985f403f015bb1 Mon Sep 17 00:00:00 2001 From: Adin Schmahmann Date: Mon, 17 Aug 2020 14:06:28 -0400 Subject: [PATCH 5/5] use sessions for ipfs dag stat --- core/commands/dag/dag.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/commands/dag/dag.go b/core/commands/dag/dag.go index 5ec3be8ee1e..4df6571cc83 100644 --- a/core/commands/dag/dag.go +++ b/core/commands/dag/dag.go @@ -714,14 +714,15 @@ Note: This command skips duplicate blocks in reporting both size and the number return fmt.Errorf("cannot return size for anything other than a DAG with a root CID") } - obj, err := api.Dag().Get(req.Context, rp.Cid()) + nodeGetter := mdag.NewSession(req.Context, api.Dag()) + obj, err := nodeGetter.Get(req.Context, rp.Cid()) if err != nil { return err } dagstats := &DagStat{} err = traverse.Traverse(obj, traverse.Options{ - DAG: api.Dag(), + DAG: nodeGetter, Order: traverse.DFSPre, Func: func(current traverse.State) error { dagstats.Size += uint64(len(current.Node.RawData()))