diff --git a/cmd/lassie/fetch.go b/cmd/lassie/fetch.go index bd423c6f..423d869f 100644 --- a/cmd/lassie/fetch.go +++ b/cmd/lassie/fetch.go @@ -48,6 +48,19 @@ var fetchFlags = []cli.Flag{ return nil }, }, + &cli.StringFlag{ + Name: "entity-bytes", + Usage: "describes the byte range to consider when selecting the blocks from a sharded file." + + " Valid values should be of the form from:to, where from and to are byte offsets and to may be '*'", + DefaultText: "defaults to the entire file, 0:*", + Action: func(cctx *cli.Context, v string) error { + if _, err := types.ParseByteRange(v); err != nil { + return fmt.Errorf("invalid entity-bytes parameter, must be of the form from:to," + + " where from and to are byte offsets and to may be '*'") + } + return nil + }, + }, FlagIPNIEndpoint, FlagEventRecorderAuth, FlagEventRecorderInstanceId, @@ -80,6 +93,7 @@ func fetchAction(cctx *cli.Context) error { dataWriter := cctx.App.Writer dagScope := cctx.String("dag-scope") + entityBytes := cctx.String("entity-bytes") tempDir := cctx.String("tempdir") progress := cctx.Bool("progress") @@ -113,6 +127,7 @@ func fetchAction(cctx *cli.Context) error { rootCid, path, dagScope, + entityBytes, tempDir, progress, outfile, @@ -190,6 +205,7 @@ type fetchRunFunc func( rootCid cid.Cid, path string, dagScope string, + entityBytes string, tempDir string, progress bool, outfile string, @@ -209,6 +225,7 @@ func defaultFetchRun( rootCid cid.Cid, path string, dagScope string, + entityBytes string, tempDir string, progress bool, outfile string, @@ -260,7 +277,12 @@ func defaultFetchRun( } }, false) - request, err := types.NewRequestForPath(carStore, rootCid, path, types.DagScope(dagScope)) + byteRange, _ := types.ParseByteRange(entityBytes) + var br *types.ByteRange + if !byteRange.IsDefault() { + br = &byteRange + } + request, err := types.NewRequestForPath(carStore, rootCid, path, types.DagScope(dagScope), br) if err != nil { return err } diff --git a/cmd/lassie/fetch_test.go b/cmd/lassie/fetch_test.go index 2ca6f5aa..15ff0f85 100644 --- a/cmd/lassie/fetch_test.go +++ b/cmd/lassie/fetch_test.go @@ -28,11 +28,12 @@ func TestFetchCommandFlags(t *testing.T) { { name: "with default args", args: []string{"fetch", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4"}, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { // fetch specific params require.Equal(t, "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", rootCid.String()) require.Equal(t, "", path) require.Equal(t, string(types.DagScopeAll), dagScope) + require.Empty(t, entityBytes) require.Equal(t, false, progress) require.Equal(t, "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4.car", outfile) @@ -70,7 +71,7 @@ func TestFetchCommandFlags(t *testing.T) { "fetch", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4/birb.mp4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { require.Equal(t, "/birb.mp4", path) return nil }, @@ -83,7 +84,7 @@ func TestFetchCommandFlags(t *testing.T) { "entity", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { require.Equal(t, string(types.DagScopeEntity), dagScope) return nil }, @@ -96,11 +97,50 @@ func TestFetchCommandFlags(t *testing.T) { "block", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { require.Equal(t, string(types.DagScopeBlock), dagScope) return nil }, }, + { + name: "with entity-bytes 0:*", + args: []string{ + "fetch", + "--entity-bytes", + "0:*", + "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", + }, + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { + require.Equal(t, "0:*", entityBytes) + return nil + }, + }, + { + name: "with entity-bytes 0:10", + args: []string{ + "fetch", + "--entity-bytes", + "0:10", + "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", + }, + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { + require.Equal(t, "0:10", entityBytes) + return nil + }, + }, + { + name: "with entity-bytes 1000:20000", + args: []string{ + "fetch", + "--entity-bytes", + "1000:20000", + "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", + }, + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { + require.Equal(t, "1000:20000", entityBytes) + return nil + }, + }, { name: "with progress", args: []string{ @@ -108,7 +148,7 @@ func TestFetchCommandFlags(t *testing.T) { "--progress", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { require.Equal(t, true, progress) return nil }, @@ -121,7 +161,7 @@ func TestFetchCommandFlags(t *testing.T) { "myfile", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { require.Equal(t, "myfile", outfile) return nil }, @@ -134,7 +174,7 @@ func TestFetchCommandFlags(t *testing.T) { "/ip4/127.0.0.1/tcp/5000/p2p/12D3KooWBSTEYMLSu5FnQjshEVah9LFGEZoQt26eacCEVYfedWA4", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { require.IsType(t, &retriever.DirectCandidateFinder{}, lCfg.Finder, "finder should be a DirectCandidateFinder when providers are specified") return nil }, @@ -147,7 +187,7 @@ func TestFetchCommandFlags(t *testing.T) { "https://cid.contact", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { require.IsType(t, &indexerlookup.IndexerCandidateFinder{}, lCfg.Finder, "finder should be an IndexerCandidateFinder when providing an ipni endpoint") return nil }, @@ -170,7 +210,7 @@ func TestFetchCommandFlags(t *testing.T) { "/mytmpdir", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { require.Equal(t, "/mytmpdir", tempDir) return nil }, @@ -183,7 +223,7 @@ func TestFetchCommandFlags(t *testing.T) { "30s", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { require.Equal(t, 30*time.Second, lCfg.ProviderTimeout) return nil }, @@ -196,7 +236,7 @@ func TestFetchCommandFlags(t *testing.T) { "30s", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { require.Equal(t, 30*time.Second, lCfg.GlobalTimeout) return nil }, @@ -209,7 +249,7 @@ func TestFetchCommandFlags(t *testing.T) { "bitswap,graphsync", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { require.Equal(t, []multicodec.Code{multicodec.TransportBitswap, multicodec.TransportGraphsyncFilecoinv1}, lCfg.Protocols) return nil }, @@ -222,7 +262,7 @@ func TestFetchCommandFlags(t *testing.T) { "12D3KooWBSTEYMLSu5FnQjshEVah9LFGEZoQt26eacCEVYfedWA4,12D3KooWPNbkEgjdBNeaCGpsgCrPRETe4uBZf1ShFXStobdN18ys", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { p1, err := peer.Decode("12D3KooWBSTEYMLSu5FnQjshEVah9LFGEZoQt26eacCEVYfedWA4") require.NoError(t, err) p2, err := peer.Decode("12D3KooWPNbkEgjdBNeaCGpsgCrPRETe4uBZf1ShFXStobdN18ys") @@ -241,7 +281,7 @@ func TestFetchCommandFlags(t *testing.T) { "10", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { require.Equal(t, 10, lCfg.BitswapConcurrency) return nil }, @@ -254,7 +294,7 @@ func TestFetchCommandFlags(t *testing.T) { "https://myeventrecorder.com/v1/retrieval-events", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { require.Equal(t, "https://myeventrecorder.com/v1/retrieval-events", erCfg.EndpointURL) return nil }, @@ -267,7 +307,7 @@ func TestFetchCommandFlags(t *testing.T) { "secret", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { require.Equal(t, "secret", erCfg.EndpointAuthorization) return nil }, @@ -280,7 +320,7 @@ func TestFetchCommandFlags(t *testing.T) { "myinstanceid", "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, - assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { + assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { require.Equal(t, "myinstanceid", erCfg.InstanceID) return nil }, @@ -312,6 +352,6 @@ func TestFetchCommandFlags(t *testing.T) { } } -func noopRun(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, tempDir string, progress bool, outfile string) error { +func noopRun(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { return nil } diff --git a/go.mod b/go.mod index 447b2068..2ecc7243 100644 --- a/go.mod +++ b/go.mod @@ -6,12 +6,12 @@ require ( github.com/benbjohnson/clock v1.3.5 github.com/cespare/xxhash/v2 v2.2.0 github.com/dustin/go-humanize v1.0.1 - github.com/filecoin-project/go-data-transfer/v2 v2.0.0-rc7 + github.com/filecoin-project/go-data-transfer/v2 v2.0.0-rc7.0.20230818040822-432a10ec7e4a github.com/filecoin-project/go-retrieval-types v1.2.0 github.com/filecoin-project/go-state-types v0.10.0 github.com/google/uuid v1.3.0 github.com/hannahhoward/go-pubsub v1.0.0 - github.com/ipfs/boxo v0.10.0 + github.com/ipfs/boxo v0.11.1-0.20230817065640-7ec68c5e5adf github.com/ipfs/go-block-format v0.1.2 github.com/ipfs/go-cid v0.4.1 github.com/ipfs/go-datastore v0.6.0 @@ -22,10 +22,11 @@ require ( github.com/ipfs/go-ipfs-exchange-interface v0.2.0 github.com/ipfs/go-ipld-format v0.5.0 github.com/ipfs/go-log/v2 v2.5.1 - github.com/ipfs/go-unixfsnode v1.7.1 - github.com/ipld/go-car/v2 v2.10.1 + github.com/ipfs/go-unixfsnode v1.7.4 + github.com/ipld/go-car/v2 v2.11.0 github.com/ipld/go-codec-dagpb v1.6.0 - github.com/ipld/go-ipld-prime v0.20.1-0.20230329011551-5056175565b0 + github.com/ipld/go-ipld-prime v0.21.0 + github.com/ipld/ipld/specs v0.0.0-20230816230151-73f8fbea1783 github.com/ipni/go-libipni v0.0.8-0.20230425184153-86a1fcb7f7ff github.com/libp2p/go-libp2p v0.30.0 github.com/libp2p/go-libp2p-routing-helpers v0.7.0 @@ -72,12 +73,13 @@ require ( github.com/golang/mock v1.6.0 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/google/gopacket v1.1.19 // indirect - github.com/google/pprof v0.0.0-20230821062121-407c9e7a662f // indirect + github.com/google/pprof v0.0.0-20230817174616-7a8ec2ada47b // indirect github.com/gorilla/websocket v1.5.0 // indirect github.com/hannahhoward/cbor-gen-for v0.0.0-20230214144701-5d17c9d5243c // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect + github.com/hashicorp/golang-lru/v2 v2.0.5 // indirect github.com/huin/goupnp v1.2.0 // indirect github.com/ipfs/bbloom v0.0.4 // indirect github.com/ipfs/go-bitfield v1.1.0 // indirect @@ -131,17 +133,18 @@ require ( github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/polydawn/refmt v0.89.0 // indirect - github.com/prometheus/client_golang v1.16.0 // indirect + github.com/prometheus/client_golang v1.14.0 // indirect github.com/prometheus/client_model v0.4.0 // indirect - github.com/prometheus/common v0.44.0 // indirect - github.com/prometheus/procfs v0.11.1 // indirect + github.com/prometheus/common v0.42.0 // indirect + github.com/prometheus/procfs v0.9.0 // indirect github.com/quic-go/qpack v0.4.0 // indirect github.com/quic-go/qtls-go1-20 v0.3.2 // indirect - github.com/quic-go/quic-go v0.38.0 // indirect + github.com/quic-go/quic-go v0.37.6 // indirect github.com/quic-go/webtransport-go v0.5.3 // indirect github.com/raulk/go-watchdog v1.3.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect + github.com/warpfork/go-testmark v0.12.1 // indirect github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect github.com/whyrusleeping/cbor-gen v0.0.0-20230126041949-52956bd4c9aa // indirect github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f // indirect @@ -160,7 +163,7 @@ require ( golang.org/x/text v0.12.0 // indirect golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect - google.golang.org/protobuf v1.31.0 // indirect + google.golang.org/protobuf v1.30.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect lukechampine.com/blake3 v1.2.1 // indirect ) diff --git a/go.sum b/go.sum index 17f47472..bbde5869 100644 --- a/go.sum +++ b/go.sum @@ -97,8 +97,8 @@ github.com/filecoin-project/go-commp-utils v0.1.3/go.mod h1:3ENlD1pZySaUout0p9AN github.com/filecoin-project/go-commp-utils/nonffi v0.0.0-20220905160352-62059082a837/go.mod h1:e2YBjSblNVoBckkbv3PPqsq71q98oFkFqL7s1etViGo= github.com/filecoin-project/go-crypto v0.0.0-20191218222705-effae4ea9f03 h1:2pMXdBnCiXjfCYx/hLqFxccPoqsSveQFxVLvNxy9bus= github.com/filecoin-project/go-crypto v0.0.0-20191218222705-effae4ea9f03/go.mod h1:+viYnvGtUTgJRdy6oaeF4MTFKAfatX071MPDPBL11EQ= -github.com/filecoin-project/go-data-transfer/v2 v2.0.0-rc7 h1:v+zJS5B6pA3ptWZS4t8tbt1Hz9qENnN4nVr1w99aSWc= -github.com/filecoin-project/go-data-transfer/v2 v2.0.0-rc7/go.mod h1:V3Y4KbttaCwyg1gwkP7iai8CbQx4mZUGjd3h9GZWLKE= +github.com/filecoin-project/go-data-transfer/v2 v2.0.0-rc7.0.20230818040822-432a10ec7e4a h1:v6ZyQK5U965p+6QHjbA2QFt9SE4kgQiA+qR2uSCVXI4= +github.com/filecoin-project/go-data-transfer/v2 v2.0.0-rc7.0.20230818040822-432a10ec7e4a/go.mod h1:P0/wHIz9WdKSrLg+D9Ue9bc09iAF8lDW8EXDYRmUP00= github.com/filecoin-project/go-ds-versioning v0.1.2 h1:to4pTadv3IeV1wvgbCbN6Vqd+fu+7tveXgv/rCEZy6w= github.com/filecoin-project/go-ds-versioning v0.1.2/go.mod h1:C9/l9PnB1+mwPa26BBVpCjG/XQCB0yj/q5CK2J8X1I4= github.com/filecoin-project/go-fil-commcid v0.0.0-20200716160307-8f644712406f/go.mod h1:Eaox7Hvus1JgPrL5+M3+h7aSPHc0cVqpSxA+TxIEpZQ= @@ -127,7 +127,7 @@ github.com/flynn/noise v1.0.0 h1:DlTHqmzmvcEiKj+4RYo/imoswx/4r6iBlCMfVtrMXpQ= github.com/flynn/noise v1.0.0/go.mod h1:xbMo+0i6+IGbYdJhF31t2eR1BIU0CYc12+BNAKwUTag= github.com/francoispqt/gojay v1.2.13 h1:d2m3sFjloqoIUQU3TsHBgj6qg/BVGlTBeHDUmyJnXKk= github.com/francoispqt/gojay v1.2.13/go.mod h1:ehT5mTG4ua4581f1++1WLG0vPdaA9HaiDsoyrBGkyDY= -github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= @@ -190,8 +190,8 @@ github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXi github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20230821062121-407c9e7a662f h1:pDhu5sgp8yJlEF/g6osliIIpF9K4F5jvkULXa4daRDQ= -github.com/google/pprof v0.0.0-20230821062121-407c9e7a662f/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= +github.com/google/pprof v0.0.0-20230817174616-7a8ec2ada47b h1:h9U78+dx9a4BKdQkBBos92HalKpaGKHrp+3Uo6yTodo= +github.com/google/pprof v0.0.0-20230817174616-7a8ec2ada47b/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= @@ -222,13 +222,15 @@ github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc= github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= +github.com/hashicorp/golang-lru/v2 v2.0.5 h1:wW7h1TG88eUIJ2i69gaE3uNVtEPIagzhGvHgwfx2Vm4= +github.com/hashicorp/golang-lru/v2 v2.0.5/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/huin/goupnp v1.2.0 h1:uOKW26NG1hsSSbXIZ1IR7XP9Gjd1U8pnLaCMgntmkmY= github.com/huin/goupnp v1.2.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFckNX8= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= -github.com/ipfs/boxo v0.10.0 h1:tdDAxq8jrsbRkYoF+5Rcqyeb91hgWe2hp7iLu7ORZLY= -github.com/ipfs/boxo v0.10.0/go.mod h1:Fg+BnfxZ0RPzR0nOodzdIq3A7KgoWAOWsEIImrIQdBM= +github.com/ipfs/boxo v0.11.1-0.20230817065640-7ec68c5e5adf h1:toUvJ0yELWjrVmFX8AdriAfzl/EtqvYrpkfEniAJiFo= +github.com/ipfs/boxo v0.11.1-0.20230817065640-7ec68c5e5adf/go.mod h1:8IfDmp+FzFGcF4zjAgHMVPpwYw4AjN9ePEzDfkaYJ1w= github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-block-format v0.0.2/go.mod h1:AWR46JfpcObNfg3ok2JHDUfdiHRgWhJgCQF+KIgOPJY= @@ -308,16 +310,18 @@ github.com/ipfs/go-metrics-interface v0.0.1/go.mod h1:6s6euYU4zowdslK0GKHmqaIZ3j github.com/ipfs/go-peertaskqueue v0.8.1 h1:YhxAs1+wxb5jk7RvS0LHdyiILpNmRIRnZVztekOF0pg= github.com/ipfs/go-peertaskqueue v0.8.1/go.mod h1:Oxxd3eaK279FxeydSPPVGHzbwVeHjatZ2GA8XD+KbPU= github.com/ipfs/go-unixfs v0.4.5 h1:wj8JhxvV1G6CD7swACwSKYa+NgtdWC1RUit+gFnymDU= -github.com/ipfs/go-unixfsnode v1.7.1 h1:RRxO2b6CSr5UQ/kxnGzaChTjp5LWTdf3Y4n8ANZgB/s= -github.com/ipfs/go-unixfsnode v1.7.1/go.mod h1:PVfoyZkX1B34qzT3vJO4nsLUpRCyhnMuHBznRcXirlk= +github.com/ipfs/go-unixfsnode v1.7.4 h1:iLvKyAVKUYOIAW2t4kDYqsT7VLGj31eXJE2aeqGfbwA= +github.com/ipfs/go-unixfsnode v1.7.4/go.mod h1:PVfoyZkX1B34qzT3vJO4nsLUpRCyhnMuHBznRcXirlk= github.com/ipfs/go-verifcid v0.0.2 h1:XPnUv0XmdH+ZIhLGKg6U2vaPaRDXb9urMyNVCE7uvTs= -github.com/ipld/go-car/v2 v2.10.1 h1:MRDqkONNW9WRhB79u+Z3U5b+NoN7lYA5B8n8qI3+BoI= -github.com/ipld/go-car/v2 v2.10.1/go.mod h1:sQEkXVM3csejlb1kCCb+vQ/pWBKX9QtvsrysMQjOgOg= +github.com/ipld/go-car/v2 v2.11.0 h1:lkAPwbbTFqbdfawgm+bfmFc8PjGC7D12VcaLXPCLNfM= +github.com/ipld/go-car/v2 v2.11.0/go.mod h1:aDszqev0zjtU8l96g4lwXHaU9bzArj56Y7eEN0q/xqA= github.com/ipld/go-codec-dagpb v1.6.0 h1:9nYazfyu9B1p3NAgfVdpRco3Fs2nFC72DqVsMj6rOcc= github.com/ipld/go-codec-dagpb v1.6.0/go.mod h1:ANzFhfP2uMJxRBr8CE+WQWs5UsNa0pYtmKZ+agnUw9s= -github.com/ipld/go-ipld-prime v0.20.1-0.20230329011551-5056175565b0 h1:iJTl9tx5DEsnKpppX5PmfdoQ3ITuBmkh3yyEpHWY2SI= -github.com/ipld/go-ipld-prime v0.20.1-0.20230329011551-5056175565b0/go.mod h1:wmOtdy70ajP48iZITH8uLsGJVMqA4EJM61/bSfYYGhs= +github.com/ipld/go-ipld-prime v0.21.0 h1:n4JmcpOlPDIxBcY037SVfpd1G+Sj1nKZah0m6QH9C2E= +github.com/ipld/go-ipld-prime v0.21.0/go.mod h1:3RLqy//ERg/y5oShXXdx5YIp50cFGOanyMctpPjsvxQ= github.com/ipld/go-ipld-prime/storage/bsadapter v0.0.0-20230102063945-1a409dc236dd h1:gMlw/MhNr2Wtp5RwGdsW23cs+yCuj9k2ON7i9MiJlRo= +github.com/ipld/ipld/specs v0.0.0-20230816230151-73f8fbea1783 h1:09+y1AqnODibi/a6xvuwEiEbx51XZ21pWzKgeezfNII= +github.com/ipld/ipld/specs v0.0.0-20230816230151-73f8fbea1783/go.mod h1:AfGlAr20WOjV5PyCowEnGY3pAm5x5i+o0R8IUeir6cs= github.com/ipni/go-libipni v0.0.8-0.20230425184153-86a1fcb7f7ff h1:xbKrIvnpQkbF8iHPk/HGcegsypCDpcXWHhzBCLyCWf8= github.com/ipni/go-libipni v0.0.8-0.20230425184153-86a1fcb7f7ff/go.mod h1:paYP9U4N3/vOzGCuN9kU972vtvw9JUcQjOKyiCFGwRk= github.com/ipsn/go-secp256k1 v0.0.0-20180726113642-9d62b9f0bc52 h1:QG4CGBqCeuBo6aZlGAamSkxWdgWfZGeE49eUOWJPA4c= @@ -491,31 +495,31 @@ github.com/polydawn/refmt v0.0.0-20190809202753-05966cbd336a/go.mod h1:uIp+gprXx github.com/polydawn/refmt v0.89.0 h1:ADJTApkvkeBZsN0tBTx8QjpD9JkmxbKp0cxfr9qszm4= github.com/polydawn/refmt v0.89.0/go.mod h1:/zvteZs/GwLtCgZ4BL6CBsk9IKIlexP43ObX9AxTqTw= github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8= -github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc= +github.com/prometheus/client_golang v1.14.0 h1:nJdhIvne2eSX/XRAFV9PcvFFRbrjbcTUj0VP62TMhnw= +github.com/prometheus/client_golang v1.14.0/go.mod h1:8vpkKitgIVNcqrRBWh1C4TIUQgYNtG/XQE4E/Zae36Y= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY= github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= github.com/prometheus/common v0.0.0-20180801064454-c7de2306084e/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= -github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= -github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= +github.com/prometheus/common v0.42.0 h1:EKsfXEYo4JpWMHH5cg+KOUWeuJSov1Id8zGR8eeI1YM= +github.com/prometheus/common v0.42.0/go.mod h1:xBwqVerjNdUDjgODMpudtOMwlOwf2SaTr1yjz4b7Zbc= github.com/prometheus/procfs v0.0.0-20180725123919-05ee40e3a273/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwaUuI= -github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY= +github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI= +github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY= github.com/quic-go/qpack v0.4.0 h1:Cr9BXA1sQS2SmDUWjSofMPNKmvF6IiIfDRmgU0w1ZCo= github.com/quic-go/qpack v0.4.0/go.mod h1:UZVnYIfi5GRk+zI9UMaCPsmZ2xKJP7XBUvVyT1Knj9A= github.com/quic-go/qtls-go1-20 v0.3.2 h1:rRgN3WfnKbyik4dBV8A6girlJVxGand/d+jVKbQq5GI= github.com/quic-go/qtls-go1-20 v0.3.2/go.mod h1:X9Nh97ZL80Z+bX/gUXMbipO6OxdiDi58b/fMC9mAL+k= -github.com/quic-go/quic-go v0.38.0 h1:T45lASr5q/TrVwt+jrVccmqHhPL2XuSyoCLVCpfOSLc= -github.com/quic-go/quic-go v0.38.0/go.mod h1:MPCuRq7KBK2hNcfKj/1iD1BGuN3eAYMeNxp3T42LRUg= +github.com/quic-go/quic-go v0.37.6 h1:2IIUmQzT5YNxAiaPGjs++Z4hGOtIR0q79uS5qE9ccfY= +github.com/quic-go/quic-go v0.37.6/go.mod h1:YsbH1r4mSHPJcLF4k4zruUkLBqctEMBDR6VPvcYjIsU= github.com/quic-go/webtransport-go v0.5.3 h1:5XMlzemqB4qmOlgIus5zB45AcZ2kCgCy2EptUrfOPWU= github.com/quic-go/webtransport-go v0.5.3/go.mod h1:OhmmgJIzTTqXK5xvtuX0oBpLV2GkLWNDA+UeTGJXErU= github.com/raulk/go-watchdog v1.3.0 h1:oUmdlHxdkXRJlwfG0O9omj8ukerm8MEQavSiDTEtBsk= github.com/raulk/go-watchdog v1.3.0/go.mod h1:fIvOnLbF0b0ZwkB9YU4mOW9Did//4vPZtDqv66NfsMU= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= @@ -575,7 +579,9 @@ github.com/urfave/cli/v2 v2.24.4 h1:0gyJJEBYtCV87zI/x2nZCPyDxD51K6xM8SkwjHFCNEU= github.com/urfave/cli/v2 v2.24.4/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc= github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU= github.com/viant/toolbox v0.24.0/go.mod h1:OxMCG57V0PXuIP2HNQrtJf2CjqdmbrOx5EkMILuUhzM= -github.com/warpfork/go-testmark v0.11.0 h1:J6LnV8KpceDvo7spaNU4+DauH2n1x+6RaO2rJrmpQ9U= +github.com/warpfork/go-fsx v0.3.0/go.mod h1:oTACCMj+Zle+vgVa5SAhGAh7WksYpLgGUCKEAVc+xPg= +github.com/warpfork/go-testmark v0.12.1 h1:rMgCpJfwy1sJ50x0M0NgyphxYYPMOODIJHhsXyEHU0s= +github.com/warpfork/go-testmark v0.12.1/go.mod h1:kHwy7wfvGSPh1rQJYKayD4AbtNaeyZdcGi9tNJTaa5Y= github.com/warpfork/go-wish v0.0.0-20180510122957-5ad1f5abf436/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/warpfork/go-wish v0.0.0-20190328234359-8b3e70f8e830/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 h1:GDDkbFiaK8jsSDJfjId/PEGEShv6ugrt4kYsC5UIDaQ= @@ -883,8 +889,8 @@ google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8 google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= +google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/pkg/internal/itest/client_retrieval_test.go b/pkg/internal/itest/client_retrieval_test.go index 60096128..e14d64c0 100644 --- a/pkg/internal/itest/client_retrieval_test.go +++ b/pkg/internal/itest/client_retrieval_test.go @@ -11,11 +11,12 @@ import ( retrievaltypes "github.com/filecoin-project/go-retrieval-types" "github.com/filecoin-project/go-state-types/big" "github.com/filecoin-project/lassie/pkg/internal/itest/mocknet" - "github.com/filecoin-project/lassie/pkg/net/client" + "github.com/filecoin-project/lassie/pkg/retriever/graphsync/client" "github.com/ipfs/go-cid" "github.com/ipfs/go-datastore" "github.com/ipfs/go-datastore/namespace" dss "github.com/ipfs/go-datastore/sync" + "github.com/ipfs/go-graphsync" "github.com/ipfs/go-graphsync/storeutil" bstore "github.com/ipfs/go-ipfs-blockstore" "github.com/ipfs/go-unixfsnode" @@ -129,6 +130,7 @@ func runRetrieval(t *testing.T, ctx context.Context, mrn *mocknet.MockRetrievalN proposal, selectorparse.CommonSelector_ExploreAllRecursively, 0, + func(rp graphsync.ResponseProgress) {}, subscriberLocal, shutdown, ) diff --git a/pkg/internal/itest/direct_fetch_test.go b/pkg/internal/itest/direct_fetch_test.go index c15d4551..a03fa60e 100644 --- a/pkg/internal/itest/direct_fetch_test.go +++ b/pkg/internal/itest/direct_fetch_test.go @@ -116,7 +116,7 @@ func TestDirectFetch(t *testing.T) { }() outCar, err := storage.NewReadableWritable(outFile, []cid.Cid{srcData1.Root}, carv2.WriteAsCarV1(true)) req.NoError(err) - request, err := types.NewRequestForPath(outCar, srcData1.Root, "", types.DagScopeAll) + request, err := types.NewRequestForPath(outCar, srcData1.Root, "", types.DagScopeAll, nil) req.NoError(err) _, err = lassie.Fetch(ctx, request, func(types.RetrievalEvent) {}) req.NoError(err) diff --git a/pkg/internal/itest/http_fetch_test.go b/pkg/internal/itest/http_fetch_test.go index 0a7eea7d..65b10a98 100644 --- a/pkg/internal/itest/http_fetch_test.go +++ b/pkg/internal/itest/http_fetch_test.go @@ -64,21 +64,20 @@ func TestHttpFetch(t *testing.T) { wrapPath := "/want2/want1/want0" testCases := []struct { - name string - graphsyncRemotes int - bitswapRemotes int - httpRemotes int - disableGraphsync bool - expectFail bool - expectUncleanEnd bool - expectUnauthorized bool - modifyHttpConfig func(httpserver.HttpServerConfig) httpserver.HttpServerConfig - generate func(*testing.T, io.Reader, []testpeer.TestPeer) []unixfs.DirEntry - paths []string - setHeader headerSetter - modifyQueries []queryModifier - validateBodies []bodyValidator - lassieOpts lassieOptsGen + name string + graphsyncRemotes int + bitswapRemotes int + httpRemotes int + disableGraphsync bool + expectStatusCode int + expectUncleanEnd bool + modifyHttpConfig func(httpserver.HttpServerConfig) httpserver.HttpServerConfig + generate func(*testing.T, io.Reader, []testpeer.TestPeer) []unixfs.DirEntry + paths []string + setHeader headerSetter + modifyQueries []queryModifier + validateBodies []bodyValidator + lassieOpts lassieOptsGen }{ { name: "graphsync large sharded file", @@ -690,6 +689,54 @@ func TestHttpFetch(t *testing.T) { unixfs.CompareDirEntries(t, srcData.Children[1].Children[1].Children[1], gotDir) }}, }, + { + name: "graphsync nested file, with path plus extra, unclean end (path unfulfilled)", + graphsyncRemotes: 1, + generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { + lsys := remotes[0].LinkSystem + return []unixfs.DirEntry{unixfs.WrapContent(t, rndReader, lsys, unixfs.GenerateFile(t, remotes[0].LinkSystem, rndReader, 1024), wrapPath, false)} + }, + paths: []string{wrapPath + "/more/not/here"}, + modifyQueries: []queryModifier{entityQuery}, + validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { + // no validation, Go's body parser will fail on the unclean end and we're unlikely + // to have enough content quick enough to parse before it encounters the unclean end + // and returns nothing + }}, + expectUncleanEnd: true, + }, + { + name: "bitswap nested file, with path plus extra, unclean end (path unfulfilled)", + bitswapRemotes: 1, + generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { + lsys := remotes[0].LinkSystem + return []unixfs.DirEntry{unixfs.WrapContent(t, rndReader, lsys, unixfs.GenerateFile(t, remotes[0].LinkSystem, rndReader, 1024), wrapPath, false)} + }, + paths: []string{wrapPath + "/more/not/here"}, + modifyQueries: []queryModifier{entityQuery}, + validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { + // no validation, Go's body parser will fail on the unclean end and we're unlikely + // to have enough content quick enough to parse before it encounters the unclean end + // and returns nothing + }}, + expectUncleanEnd: true, + }, + { + name: "http nested file, with path plus extra, unclean end (path unfulfilled)", + httpRemotes: 1, + generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { + lsys := remotes[0].LinkSystem + return []unixfs.DirEntry{unixfs.WrapContent(t, rndReader, lsys, unixfs.GenerateFile(t, remotes[0].LinkSystem, rndReader, 1024), wrapPath, false)} + }, + paths: []string{wrapPath + "/more/not/here"}, + modifyQueries: []queryModifier{entityQuery}, + validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { + // no validation, Go's body parser will fail on the unclean end and we're unlikely + // to have enough content quick enough to parse before it encounters the unclean end + // and returns nothing + }}, + expectUncleanEnd: true, + }, { // A very contrived example - we spread the content generated for this test across 4 peers, // then we also make sure the root is in all of them, so the CandidateFinder will return them @@ -765,7 +812,7 @@ func TestHttpFetch(t *testing.T) { name: "two separate, parallel graphsync retrievals, with graphsync disabled", graphsyncRemotes: 2, disableGraphsync: true, - expectFail: true, + expectStatusCode: http.StatusGatewayTimeout, generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { return []unixfs.DirEntry{ unixfs.GenerateFile(t, remotes[0].LinkSystem, rndReader, 4<<20), @@ -929,7 +976,7 @@ func TestHttpFetch(t *testing.T) { cfg.AccessToken = "super-secret" return cfg }, - expectUnauthorized: true, + expectStatusCode: http.StatusUnauthorized, }, { name: "with access token - allows requests with authorization header", @@ -945,7 +992,7 @@ func TestHttpFetch(t *testing.T) { header.Set("Authorization", "Bearer super-secret") header.Add("Accept", "application/vnd.ipld.car") }, - expectUnauthorized: false, + expectStatusCode: http.StatusOK, // i.e. not StatusUnauthorized }, } @@ -979,7 +1026,8 @@ func TestHttpFetch(t *testing.T) { if testCase.lassieOpts != nil { customOpts = testCase.lassieOpts(t, mrn) } - opts := append([]lassie.LassieOption{lassie.WithProviderTimeout(20 * time.Second), + opts := append([]lassie.LassieOption{ + lassie.WithProviderTimeout(20 * time.Second), lassie.WithHost(mrn.Self), lassie.WithFinder(mrn.Finder), }, customOpts...) @@ -1058,10 +1106,8 @@ func TestHttpFetch(t *testing.T) { } for i, resp := range responses { - if testCase.expectFail { - req.Equal(http.StatusGatewayTimeout, resp.StatusCode) - } else if testCase.expectUnauthorized { - req.Equal(http.StatusUnauthorized, resp.StatusCode) + if testCase.expectStatusCode != 0 && testCase.expectStatusCode != http.StatusOK { + req.Equal(testCase.expectStatusCode, resp.StatusCode) } else { if resp.StatusCode != http.StatusOK { body, err := io.ReadAll(resp.Body) diff --git a/pkg/internal/itest/linksystemutil/linksystemblockstore.go b/pkg/internal/itest/linksystemutil/linksystemblockstore.go new file mode 100644 index 00000000..9a93223d --- /dev/null +++ b/pkg/internal/itest/linksystemutil/linksystemblockstore.go @@ -0,0 +1,89 @@ +package linksystemutil + +import ( + "bytes" + "context" + "errors" + "io" + + "github.com/ipfs/boxo/blockstore" + blocks "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + "github.com/ipld/go-ipld-prime/linking" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" +) + +var _ blockstore.Blockstore = (*LinkSystemBlockstore)(nil) + +type LinkSystemBlockstore struct { + lsys linking.LinkSystem +} + +func NewLinkSystemBlockstore(lsys linking.LinkSystem) *LinkSystemBlockstore { + return &LinkSystemBlockstore{lsys} +} + +func (lsbs *LinkSystemBlockstore) DeleteBlock(ctx context.Context, c cid.Cid) error { + return errors.New("not supported") +} + +func (lsbs *LinkSystemBlockstore) Has(ctx context.Context, c cid.Cid) (bool, error) { + _, err := lsbs.lsys.StorageReadOpener(linking.LinkContext{Ctx: ctx}, cidlink.Link{Cid: c}) + if err != nil { + return false, err + } + return true, nil +} + +func (lsbs *LinkSystemBlockstore) Get(ctx context.Context, c cid.Cid) (blocks.Block, error) { + rdr, err := lsbs.lsys.StorageReadOpener(linking.LinkContext{Ctx: ctx}, cidlink.Link{Cid: c}) + if err != nil { + return nil, err + } + var buf bytes.Buffer + _, err = io.Copy(&buf, rdr) + if err != nil { + return nil, err + } + return blocks.NewBlockWithCid(buf.Bytes(), c) +} + +func (lsbs *LinkSystemBlockstore) GetSize(ctx context.Context, c cid.Cid) (int, error) { + rdr, err := lsbs.lsys.StorageReadOpener(linking.LinkContext{Ctx: ctx}, cidlink.Link{Cid: c}) + if err != nil { + return 0, err + } + i, err := io.Copy(io.Discard, rdr) + if err != nil { + return 0, err + } + return int(i), nil +} + +func (lsbs *LinkSystemBlockstore) Put(ctx context.Context, blk blocks.Block) error { + w, wc, err := lsbs.lsys.StorageWriteOpener(linking.LinkContext{Ctx: ctx}) + if err != nil { + return err + } + if _, err = io.Copy(w, bytes.NewReader(blk.RawData())); err != nil { + return err + } + return wc(cidlink.Link{Cid: blk.Cid()}) +} + +func (lsbs *LinkSystemBlockstore) PutMany(ctx context.Context, blks []blocks.Block) error { + for _, blk := range blks { + if err := lsbs.Put(ctx, blk); err != nil { + return err + } + } + return nil +} + +func (lsbs *LinkSystemBlockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { + return nil, errors.New("not supported") +} + +func (lsbs *LinkSystemBlockstore) HashOnRead(enabled bool) { + lsbs.lsys.TrustedStorage = !enabled +} diff --git a/pkg/internal/itest/mocknet/mocknet.go b/pkg/internal/itest/mocknet/mocknet.go index 3b32f986..fa35a9c6 100644 --- a/pkg/internal/itest/mocknet/mocknet.go +++ b/pkg/internal/itest/mocknet/mocknet.go @@ -62,16 +62,16 @@ func NewMockRetrievalNet(ctx context.Context, t *testing.T) *MockRetrievalNet { return mrn } -func (mrn *MockRetrievalNet) AddBitswapPeers(n int) { - mrn.addPeers(mrn.testPeerGenerator.BitswapPeers(n)) +func (mrn *MockRetrievalNet) AddBitswapPeers(n int, opts ...testpeer.PeerOption) { + mrn.addPeers(mrn.testPeerGenerator.BitswapPeers(n, opts...)) } -func (mrn *MockRetrievalNet) AddGraphsyncPeers(n int) { - mrn.addPeers(mrn.testPeerGenerator.GraphsyncPeers(n)) +func (mrn *MockRetrievalNet) AddGraphsyncPeers(n int, opts ...testpeer.PeerOption) { + mrn.addPeers(mrn.testPeerGenerator.GraphsyncPeers(n, opts...)) } -func (mrn *MockRetrievalNet) AddHttpPeers(n int) { - mrn.addPeers(mrn.testPeerGenerator.HttpPeers(n)) +func (mrn *MockRetrievalNet) AddHttpPeers(n int, opts ...testpeer.PeerOption) { + mrn.addPeers(mrn.testPeerGenerator.HttpPeers(n, opts...)) } func (mrn *MockRetrievalNet) addPeers(peers []testpeer.TestPeer) { diff --git a/pkg/internal/itest/testpeer/generator.go b/pkg/internal/itest/testpeer/generator.go index 42d52d52..7d831a22 100644 --- a/pkg/internal/itest/testpeer/generator.go +++ b/pkg/internal/itest/testpeer/generator.go @@ -1,13 +1,11 @@ package testpeer import ( - "bytes" "context" "errors" "fmt" "io" "net" - "net/http" "strings" "testing" "time" @@ -16,7 +14,7 @@ import ( dtimpl "github.com/filecoin-project/go-data-transfer/v2/impl" dtnet "github.com/filecoin-project/go-data-transfer/v2/network" gstransport "github.com/filecoin-project/go-data-transfer/v2/transport/graphsync" - "github.com/filecoin-project/lassie/pkg/types" + "github.com/filecoin-project/lassie/pkg/internal/itest/linksystemutil" bsnet "github.com/ipfs/boxo/bitswap/network" "github.com/ipfs/boxo/bitswap/server" "github.com/ipfs/go-cid" @@ -29,17 +27,9 @@ import ( blockstore "github.com/ipfs/go-ipfs-blockstore" delay "github.com/ipfs/go-ipfs-delay" "github.com/ipfs/go-log/v2" - "github.com/ipfs/go-unixfsnode" - "github.com/ipld/go-car/v2" - "github.com/ipld/go-car/v2/storage" - dagpb "github.com/ipld/go-codec-dagpb" "github.com/ipld/go-ipld-prime" - "github.com/ipld/go-ipld-prime/datamodel" "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" - "github.com/ipld/go-ipld-prime/node/basicnode" - "github.com/ipld/go-ipld-prime/traversal" - "github.com/ipld/go-ipld-prime/traversal/selector" routinghelpers "github.com/libp2p/go-libp2p-routing-helpers" tnet "github.com/libp2p/go-libp2p-testing/net" p2ptestutil "github.com/libp2p/go-libp2p-testing/netutil" @@ -87,60 +77,60 @@ func (g *TestPeerGenerator) Close() error { } // NextBitswap generates a new test peer with bitswap + dependencies -func (g *TestPeerGenerator) NextBitswap() TestPeer { +func (g *TestPeerGenerator) NextBitswap(opts ...PeerOption) TestPeer { g.seq++ p, err := RandTestPeerIdentity() require.NoError(g.t, err) - tp, err := NewTestBitswapPeer(g.ctx, g.mn, p, g.netOptions, g.bsOptions) + tp, err := NewTestBitswapPeer(g.ctx, g.mn, p, g.netOptions, g.bsOptions, opts...) require.NoError(g.t, err) return tp } // NextGraphsync generates a new test peer with graphsync + dependencies -func (g *TestPeerGenerator) NextGraphsync() TestPeer { +func (g *TestPeerGenerator) NextGraphsync(opts ...PeerOption) TestPeer { g.seq++ - p, err := p2ptestutil.RandTestBogusIdentity() + p, err := RandTestPeerIdentity() require.NoError(g.t, err) - tp, err := NewTestGraphsyncPeer(g.ctx, g.mn, p) + tp, err := NewTestGraphsyncPeer(g.ctx, g.mn, p, opts...) require.NoError(g.t, err) return tp } // NextHttp generates a new test peer with http + dependencies -func (g *TestPeerGenerator) NextHttp() TestPeer { +func (g *TestPeerGenerator) NextHttp(opts ...PeerOption) TestPeer { g.seq++ p, err := RandTestPeerIdentity() require.NoError(g.t, err) - tp, err := NewTestHttpPeer(g.ctx, g.mn, p, g.t) + tp, err := NewTestHttpPeer(g.ctx, g.mn, p, g.t, opts...) require.NoError(g.t, err) return tp } // BitswapPeers creates N test peers with bitswap + dependencies -func (g *TestPeerGenerator) BitswapPeers(n int) []TestPeer { +func (g *TestPeerGenerator) BitswapPeers(n int, opts ...PeerOption) []TestPeer { var instances []TestPeer for j := 0; j < n; j++ { - inst := g.NextBitswap() + inst := g.NextBitswap(opts...) instances = append(instances, inst) } return instances } // GraphsyncPeers creates N test peers with graphsync + dependencies -func (g *TestPeerGenerator) GraphsyncPeers(n int) []TestPeer { +func (g *TestPeerGenerator) GraphsyncPeers(n int, opts ...PeerOption) []TestPeer { var instances []TestPeer for j := 0; j < n; j++ { - inst := g.NextGraphsync() + inst := g.NextGraphsync(opts...) instances = append(instances, inst) } return instances } // HttpPeers creates N test peers with http + dependencies -func (g *TestPeerGenerator) HttpPeers(n int) []TestPeer { +func (g *TestPeerGenerator) HttpPeers(n int, opts ...PeerOption) []TestPeer { var instances []TestPeer for j := 0; j < n; j++ { - inst := g.NextHttp() + inst := g.NextHttp(opts...) instances = append(instances, inst) } return instances @@ -197,8 +187,15 @@ func (i TestPeer) AddrInfo() *peer.AddrInfo { // NB: It's easy make mistakes by providing the same peer ID to two different // instances. To safeguard, use the InstanceGenerator to generate instances. It's // just a much better idea. -func NewTestBitswapPeer(ctx context.Context, mn mocknet.Mocknet, p tnet.Identity, netOptions []bsnet.NetOpt, bsOptions []server.Option) (TestPeer, error) { - peer, _, err := newTestPeer(ctx, mn, p) +func NewTestBitswapPeer( + ctx context.Context, + mn mocknet.Mocknet, + p tnet.Identity, + netOptions []bsnet.NetOpt, + bsOptions []server.Option, + opts ...PeerOption, +) (TestPeer, error) { + peer, _, err := newTestPeer(ctx, mn, p, opts...) if err != nil { return TestPeer{}, err } @@ -215,8 +212,8 @@ func NewTestBitswapPeer(ctx context.Context, mn mocknet.Mocknet, p tnet.Identity return peer, nil } -func NewTestGraphsyncPeer(ctx context.Context, mn mocknet.Mocknet, p tnet.Identity) (TestPeer, error) { - peer, dstore, err := newTestPeer(ctx, mn, p) +func NewTestGraphsyncPeer(ctx context.Context, mn mocknet.Mocknet, p tnet.Identity, opts ...PeerOption) (TestPeer, error) { + peer, dstore, err := newTestPeer(ctx, mn, p, opts...) if err != nil { return TestPeer{}, err } @@ -241,8 +238,8 @@ func NewTestGraphsyncPeer(ctx context.Context, mn mocknet.Mocknet, p tnet.Identi return peer, nil } -func NewTestHttpPeer(ctx context.Context, mn mocknet.Mocknet, p tnet.Identity, t *testing.T) (TestPeer, error) { - peer, _, err := newTestPeer(ctx, mn, p) +func NewTestHttpPeer(ctx context.Context, mn mocknet.Mocknet, p tnet.Identity, t *testing.T, opts ...PeerOption) (TestPeer, error) { + peer, _, err := newTestPeer(ctx, mn, p, opts...) if err != nil { return TestPeer{}, err } @@ -279,7 +276,17 @@ func NewTestHttpPeer(ctx context.Context, mn mocknet.Mocknet, p tnet.Identity, t return peer, nil } -func newTestPeer(ctx context.Context, mn mocknet.Mocknet, p tnet.Identity) (TestPeer, ds.Batching, error) { +func newTestPeer( + ctx context.Context, + mn mocknet.Mocknet, + p tnet.Identity, + opts ...PeerOption, +) (TestPeer, ds.Batching, error) { + cfg := peerConfig{} + for _, opt := range opts { + opt(&cfg) + } + bsdelay := delay.Fixed(0) client, err := mn.AddPeer(p.PrivateKey(), p.Address()) @@ -287,20 +294,24 @@ func newTestPeer(ctx context.Context, mn mocknet.Mocknet, p tnet.Identity) (Test panic(err.Error()) } - dstore := ds_sync.MutexWrap(ds.NewMapDatastore()) + baseStore := ds.NewMapDatastore() + dstore := ds_sync.MutexWrap(baseStore) dstoreDelayed := delayed.New(dstore, bsdelay) - bstore, err := blockstore.CachedBlockstore(ctx, - blockstore.NewBlockstore(dstoreDelayed), - blockstore.DefaultCacheOpts()) - if err != nil { - return TestPeer{}, nil, err + if cfg.bstore == nil { + var err error + cfg.bstore, err = blockstore.CachedBlockstore(ctx, + blockstore.NewBlockstore(dstoreDelayed), + blockstore.DefaultCacheOpts()) + if err != nil { + return TestPeer{}, nil, err + } } - lsys := storeutil.LinkSystemForBlockstore(bstore) + lsys := storeutil.LinkSystemForBlockstore(cfg.bstore) tp := TestPeer{ Host: client, ID: p.ID(), - blockstore: bstore, + blockstore: cfg.bstore, blockstoreDelay: bsdelay, LinkSystem: &lsys, Cids: make(map[cid.Cid]struct{}), @@ -342,126 +353,6 @@ func StartAndWaitForReady(ctx context.Context, manager datatransfer.Manager) err } } -func MockIpfsHandler(ctx context.Context, lsys linking.LinkSystem) func(http.ResponseWriter, *http.Request) { - return func(res http.ResponseWriter, req *http.Request) { - urlPath := strings.Split(req.URL.Path, "/")[1:] - - // validate CID path parameter - cidStr := urlPath[1] - rootCid, err := cid.Parse(cidStr) - if err != nil { - http.Error(res, fmt.Sprintf("Failed to parse CID path parameter: %s", cidStr), http.StatusBadRequest) - return - } - - // Grab unixfs path if it exists - unixfsPath := "" - if len(urlPath) > 2 { - unixfsPath = "/" + strings.Join(urlPath[2:], "/") - } - - acceptTypes := strings.Split(req.Header.Get("Accept"), ",") - includeDupes := false - for _, acceptType := range acceptTypes { - typeParts := strings.Split(acceptType, ";") - if typeParts[0] == "application/vnd.ipld.car" { - for _, nextPart := range typeParts[1:] { - pair := strings.Split(nextPart, "=") - if len(pair) == 2 { - attr := strings.TrimSpace(pair[0]) - value := strings.TrimSpace(pair[1]) - if attr == "dups" && value == "y" { - includeDupes = true - } - } - } - } - } - - // We're always providing the dag-scope parameter, so add a failure case if we stop - // providing it in the future - if !req.URL.Query().Has("dag-scope") { - http.Error(res, "Missing dag-scope parameter", http.StatusBadRequest) - return - } - - // Parse car scope and use it to get selector - var dagScope types.DagScope - switch req.URL.Query().Get("dag-scope") { - case "all": - dagScope = types.DagScopeAll - case "entity": - dagScope = types.DagScopeEntity - case "block": - dagScope = types.DagScopeBlock - default: - http.Error(res, fmt.Sprintf("Invalid dag-scope parameter: %s", req.URL.Query().Get("dag-scope")), http.StatusBadRequest) - return - } - - selNode := unixfsnode.UnixFSPathSelectorBuilder(unixfsPath, dagScope.TerminalSelectorSpec(), false) - sel, err := selector.CompileSelector(selNode) - if err != nil { - http.Error(res, fmt.Sprintf("Failed to compile selector from dag-scope: %v", err), http.StatusInternalServerError) - return - } - - // Write to response writer - carWriter, err := storage.NewWritable(res, []cid.Cid{rootCid}, car.WriteAsCarV1(true), car.AllowDuplicatePuts(includeDupes)) - if err != nil { - http.Error(res, fmt.Sprintf("Failed to create car writer: %v", err), http.StatusInternalServerError) - return - } - - // Extend the StorageReadOpener func to write to the carWriter - originalSRO := lsys.StorageReadOpener - lsys.StorageReadOpener = func(lc linking.LinkContext, lnk datamodel.Link) (io.Reader, error) { - r, err := originalSRO(lc, lnk) - if err != nil { - return nil, err - } - byts, err := io.ReadAll(r) - if err != nil { - return nil, err - } - err = carWriter.Put(ctx, lnk.(cidlink.Link).Cid.KeyString(), byts) - if err != nil { - return nil, err - } - - return bytes.NewReader(byts), nil - } - - protoChooser := dagpb.AddSupportToChooser(basicnode.Chooser) - lnk := cidlink.Link{Cid: rootCid} - lnkCtx := linking.LinkContext{} - proto, err := protoChooser(lnk, lnkCtx) - if err != nil { - http.Error(res, fmt.Sprintf("Failed to choose prototype node: %s", cidStr), http.StatusBadRequest) - return - } - - rootNode, err := lsys.Load(lnkCtx, lnk, proto) - if err != nil { - http.Error(res, fmt.Sprintf("Failed to load root cid into link system: %v", err), http.StatusInternalServerError) - return - } - - cfg := &traversal.Config{ - Ctx: ctx, - LinkSystem: lsys, - LinkTargetNodePrototypeChooser: protoChooser, - } - progress := traversal.Progress{Cfg: cfg} - - err = progress.WalkAdv(rootNode, sel, visitNoop) - if err != nil { - // if we loaded the first block, we can't write headers any more - return - } - } -} - // RandTestPeerIdentity is a wrapper around // github.com/libp2p/go-libp2p-testing/netutil/RandTestBogusIdentity that // ensures the returned identity has an available port. The identity generated @@ -486,4 +377,14 @@ func RandTestPeerIdentity() (tnet.Identity, error) { return nil, errors.New("failed to find an available port") } -func visitNoop(p traversal.Progress, n datamodel.Node, vr traversal.VisitReason) error { return nil } +type peerConfig struct { + bstore blockstore.Blockstore +} + +type PeerOption func(*peerConfig) + +func WithLinkSystem(lsys linking.LinkSystem) PeerOption { + return func(pc *peerConfig) { + pc.bstore = linksystemutil.NewLinkSystemBlockstore(lsys) + } +} diff --git a/pkg/internal/itest/testpeer/peerhttpserver.go b/pkg/internal/itest/testpeer/peerhttpserver.go index 54588c2e..a439d408 100644 --- a/pkg/internal/itest/testpeer/peerhttpserver.go +++ b/pkg/internal/itest/testpeer/peerhttpserver.go @@ -1,11 +1,26 @@ package testpeer import ( + "bytes" "context" "fmt" + "io" "net" "net/http" + "strings" + "github.com/filecoin-project/lassie/pkg/types" + "github.com/ipfs/go-cid" + "github.com/ipfs/go-unixfsnode" + "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/storage" + dagpb "github.com/ipld/go-codec-dagpb" + "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipld/go-ipld-prime/linking" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime/node/basicnode" + "github.com/ipld/go-ipld-prime/traversal" + "github.com/ipld/go-ipld-prime/traversal/selector" servertiming "github.com/mitchellh/go-server-timing" ) @@ -65,3 +80,128 @@ func (s *TestPeerHttpServer) Close() error { s.cancel() return s.server.Shutdown(context.Background()) } + +func MockIpfsHandler(ctx context.Context, lsys linking.LinkSystem) func(http.ResponseWriter, *http.Request) { + return func(res http.ResponseWriter, req *http.Request) { + urlPath := strings.Split(req.URL.Path, "/")[1:] + + // validate CID path parameter + cidStr := urlPath[1] + rootCid, err := cid.Parse(cidStr) + if err != nil { + http.Error(res, fmt.Sprintf("Failed to parse CID path parameter: %s", cidStr), http.StatusBadRequest) + return + } + + // Grab unixfs path if it exists + unixfsPath := "" + if len(urlPath) > 2 { + unixfsPath = "/" + strings.Join(urlPath[2:], "/") + } + + acceptTypes := strings.Split(req.Header.Get("Accept"), ",") + includeDupes := false + for _, acceptType := range acceptTypes { + typeParts := strings.Split(acceptType, ";") + if typeParts[0] == "application/vnd.ipld.car" { + for _, nextPart := range typeParts[1:] { + pair := strings.Split(nextPart, "=") + if len(pair) == 2 { + attr := strings.TrimSpace(pair[0]) + value := strings.TrimSpace(pair[1]) + if attr == "dups" && value == "y" { + includeDupes = true + } + } + } + } + } + + // We're always providing the dag-scope parameter, so add a failure case if we stop + // providing it in the future + if !req.URL.Query().Has("dag-scope") { + http.Error(res, "Missing dag-scope parameter", http.StatusBadRequest) + return + } + + // Parse car scope and use it to get selector + var dagScope types.DagScope + switch req.URL.Query().Get("dag-scope") { + case "all": + dagScope = types.DagScopeAll + case "entity": + dagScope = types.DagScopeEntity + case "block": + dagScope = types.DagScopeBlock + default: + http.Error(res, fmt.Sprintf("Invalid dag-scope parameter: %s", req.URL.Query().Get("dag-scope")), http.StatusBadRequest) + return + } + var byteRange *types.ByteRange + if req.URL.Query().Get("entity-bytes") != "" { + br, err := types.ParseByteRange(req.URL.Query().Get("entity-bytes")) + if err != nil { + http.Error(res, fmt.Sprintf("Invalid entity-bytes parameter: %s", req.URL.Query().Get("entity-bytes")), http.StatusBadRequest) + return + } + byteRange = &br + } + + sel, err := selector.CompileSelector(types.PathScopeSelector(unixfsPath, dagScope, byteRange)) + if err != nil { + http.Error(res, fmt.Sprintf("Failed to compile selector from dag-scope: %v", err), http.StatusInternalServerError) + return + } + + // Write to response writer + carWriter, err := storage.NewWritable(res, []cid.Cid{rootCid}, car.WriteAsCarV1(true), car.AllowDuplicatePuts(includeDupes)) + if err != nil { + http.Error(res, fmt.Sprintf("Failed to create car writer: %v", err), http.StatusInternalServerError) + return + } + + // Extend the StorageReadOpener func to write to the carWriter + originalSRO := lsys.StorageReadOpener + lsys.StorageReadOpener = func(lc linking.LinkContext, lnk datamodel.Link) (io.Reader, error) { + r, err := originalSRO(lc, lnk) + if err != nil { + return nil, err + } + byts, err := io.ReadAll(r) + if err != nil { + return nil, err + } + err = carWriter.Put(ctx, lnk.(cidlink.Link).Cid.KeyString(), byts) + if err != nil { + return nil, err + } + + return bytes.NewReader(byts), nil + } + + protoChooser := dagpb.AddSupportToChooser(basicnode.Chooser) + lnk := cidlink.Link{Cid: rootCid} + lnkCtx := linking.LinkContext{} + proto, err := protoChooser(lnk, lnkCtx) + if err != nil { + http.Error(res, fmt.Sprintf("Failed to choose prototype node: %s", cidStr), http.StatusBadRequest) + return + } + + rootNode, err := lsys.Load(lnkCtx, lnk, proto) + if err != nil { + http.Error(res, fmt.Sprintf("Failed to load root cid into link system: %v", err), http.StatusInternalServerError) + return + } + + cfg := &traversal.Config{ + Ctx: ctx, + LinkSystem: lsys, + LinkTargetNodePrototypeChooser: protoChooser, + } + progress := traversal.Progress{Cfg: cfg} + + _ = progress.WalkMatching(rootNode, sel, unixfsnode.BytesConsumingMatcher) + // if we loaded the first block, we can't write headers any more so don't bother + } +} diff --git a/pkg/internal/itest/trustless_fetch_test.go b/pkg/internal/itest/trustless_fetch_test.go new file mode 100644 index 00000000..67722c4b --- /dev/null +++ b/pkg/internal/itest/trustless_fetch_test.go @@ -0,0 +1,147 @@ +package itest + +import ( + "context" + "fmt" + "io" + "net/http" + "strings" + "sync" + "testing" + "time" + + datatransfer "github.com/filecoin-project/go-data-transfer/v2" + "github.com/filecoin-project/lassie/pkg/internal/itest/mocknet" + "github.com/filecoin-project/lassie/pkg/internal/itest/testpeer" + "github.com/filecoin-project/lassie/pkg/lassie" + httpserver "github.com/filecoin-project/lassie/pkg/server/http" + "github.com/google/uuid" + "github.com/ipfs/go-unixfsnode" + "github.com/ipld/go-car/v2" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + trustlesspathing "github.com/ipld/ipld/specs/pkg-go/trustless-pathing" + "github.com/stretchr/testify/require" +) + +func TestTrustlessUnixfsFetch(t *testing.T) { + req := require.New(t) + + testCases, err := trustlesspathing.Unixfs20mVarietyCases() + req.NoError(err) + storage, closer, err := trustlesspathing.Unixfs20mVarietyReadableStorage() + req.NoError(err) + defer closer.Close() + + lsys := cidlink.DefaultLinkSystem() + lsys.TrustedStorage = true + unixfsnode.AddUnixFSReificationToLinkSystem(&lsys) + lsys.SetReadStorage(storage) + + for _, tc := range testCases { + for _, proto := range []string{"http", "graphsync", "bitswap"} { + t.Run(tc.Name+"/"+proto, func(t *testing.T) { + req := require.New(t) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + t.Logf("query=%s, blocks=%d", tc.AsQuery(), len(tc.ExpectedCids)) + + var finishedChan chan []datatransfer.Event + mrn := mocknet.NewMockRetrievalNet(ctx, t) + switch proto { + case "http": + mrn.AddHttpPeers(1, testpeer.WithLinkSystem(lsys)) + case "graphsync": + mrn.AddGraphsyncPeers(1, testpeer.WithLinkSystem(lsys)) + finishedChan = mocknet.SetupRetrieval(t, mrn.Remotes[0]) + case "bitswap": + mrn.AddBitswapPeers(1, testpeer.WithLinkSystem(lsys)) + } + + require.NoError(t, mrn.MN.LinkAll()) + mrn.Remotes[0].Cids[tc.Root] = struct{}{} + + lassie, err := lassie.NewLassie( + ctx, + lassie.WithProviderTimeout(20*time.Second), + lassie.WithHost(mrn.Self), + lassie.WithFinder(mrn.Finder), + ) + req.NoError(err) + cfg := httpserver.HttpServerConfig{Address: "127.0.0.1", Port: 0, TempDir: t.TempDir()} + httpServer, err := httpserver.NewHttpServer(ctx, lassie, cfg) + req.NoError(err) + serverError := make(chan error, 1) + go func() { + serverError <- httpServer.Start() + }() + responseChan := make(chan *http.Response, 1) + go func() { + // Make a request for our CID and read the complete CAR bytes + addr := fmt.Sprintf("http://%s%s", httpServer.Addr(), tc.AsQuery()) + getReq, err := http.NewRequest("GET", addr, nil) + req.NoError(err) + getReq.Header.Add("Accept", "application/vnd.ipld.car") + t.Log("Fetching", getReq.URL.String()) + resp, err := http.DefaultClient.Do(getReq) + req.NoError(err) + responseChan <- resp + }() + var resp *http.Response + select { + case resp = <-responseChan: + case <-ctx.Done(): + req.FailNow("Did not receive responses") + } + if finishedChan != nil { + // for graphsync + var wg sync.WaitGroup + wg.Add(1) + go func() { + mocknet.WaitForFinish(ctx, t, finishedChan, 1*time.Second) + wg.Done() + }() + wg.Wait() + } + if resp.StatusCode != http.StatusOK { + body, err := io.ReadAll(resp.Body) + req.NoError(err) + req.Failf("200 response code not received", "got code: %d, body: %s", resp.StatusCode, string(body)) + } + req.Equal(fmt.Sprintf(`attachment; filename="%s.car"`, tc.Root.String()), resp.Header.Get("Content-Disposition")) + req.Equal("none", resp.Header.Get("Accept-Ranges")) + req.Equal("public, max-age=29030400, immutable", resp.Header.Get("Cache-Control")) + req.Equal("application/vnd.ipld.car; version=1", resp.Header.Get("Content-Type")) + req.Equal("nosniff", resp.Header.Get("X-Content-Type-Options")) + etagStart := fmt.Sprintf(`"%s.car.`, tc.Root.String()) + etagGot := resp.Header.Get("ETag") + req.True(strings.HasPrefix(etagGot, etagStart), "ETag should start with [%s], got [%s]", etagStart, etagGot) + req.Equal(`"`, etagGot[len(etagGot)-1:], "ETag should end with a quote") + req.Equal(fmt.Sprintf("/ipfs/%s%s", tc.Root.String(), tc.Path), resp.Header.Get("X-Ipfs-Path")) + requestId := resp.Header.Get("X-Trace-Id") + require.NotEmpty(t, requestId) + _, err = uuid.Parse(requestId) + req.NoError(err) + + rdr, err := car.NewBlockReader(resp.Body) + req.NoError(err) + req.Len(rdr.Roots, 1) + req.Equal(tc.Root.String(), rdr.Roots[0].String()) + for ii := 0; ; ii++ { + blk, err := rdr.Next() + if err == io.EOF { + if ii != len(tc.ExpectedCids) { + req.FailNowf("unexpected EOF", "expected %d blocks, got %d", len(tc.ExpectedCids), ii) + } + break + } + req.NoError(err) + if ii >= len(tc.ExpectedCids) { + req.FailNowf("unexpected block", "got block %d, expected %d", ii, len(tc.ExpectedCids)) + } + req.Equal(tc.ExpectedCids[ii].String(), blk.Cid().String(), "unexpected block #%d", ii) + } + }) + } + } +} diff --git a/pkg/internal/testutil/mockclient.go b/pkg/internal/testutil/mockclient.go index b9824fb0..d25166b0 100644 --- a/pkg/internal/testutil/mockclient.go +++ b/pkg/internal/testutil/mockclient.go @@ -12,6 +12,7 @@ import ( retrievaltypes "github.com/filecoin-project/go-retrieval-types" "github.com/filecoin-project/lassie/pkg/types" "github.com/ipfs/go-cid" + "github.com/ipfs/go-graphsync" "github.com/ipld/go-ipld-prime" "github.com/ipld/go-ipld-prime/datamodel" "github.com/libp2p/go-libp2p/core/peer" @@ -24,9 +25,10 @@ type DelayedConnectReturn struct { } type DelayedClientReturn struct { - ResultStats *types.RetrievalStats - ResultErr error - Delay time.Duration + ResultStats *types.RetrievalStats + ProgressPaths []string + ResultErr error + Delay time.Duration } type ClientRetrievalRequest struct { @@ -182,6 +184,7 @@ func (mc *MockClient) RetrieveFromPeer( proposal *retrievaltypes.DealProposal, selector ipld.Node, maxBlocks uint64, + progressCallback func(graphsync.ResponseProgress), eventsCallback datatransfer.Subscriber, gracefulShutdownRequested <-chan struct{}, ) (*types.RetrievalStats, error) { @@ -210,6 +213,11 @@ func (mc *MockClient) RetrieveFromPeer( return nil, context.Canceled case <-timer.C: } + for _, path := range drr.ProgressPaths { + progressCallback(graphsync.ResponseProgress{ + Path: datamodel.ParsePath(path), + }) + } eventsCallback(datatransfer.Event{Code: datatransfer.Open}, nil) if drr.ResultStats != nil { acceptedResponse := &retrievaltypes.DealResponse{ diff --git a/pkg/internal/testutil/toblocks.go b/pkg/internal/testutil/toblocks.go index bbc43927..81061f8e 100644 --- a/pkg/internal/testutil/toblocks.go +++ b/pkg/internal/testutil/toblocks.go @@ -53,8 +53,7 @@ func ToBlocks(t *testing.T, lsys linking.LinkSystem, root cid.Cid, selNode datam LinkTargetNodePrototypeChooser: dagpb.AddSupportToChooser(basicnode.Chooser), }, } - vf := func(p traversal.Progress, n datamodel.Node, vr traversal.VisitReason) error { return nil } - err = prog.WalkAdv(rootNode, sel, vf) + err = prog.WalkMatching(rootNode, sel, unixfsnode.BytesConsumingMatcher) require.NoError(t, err) return traversedBlocks diff --git a/pkg/lassie/lassie.go b/pkg/lassie/lassie.go index ca0fed8f..573969c2 100644 --- a/pkg/lassie/lassie.go +++ b/pkg/lassie/lassie.go @@ -6,9 +6,9 @@ import ( "time" "github.com/filecoin-project/lassie/pkg/indexerlookup" - "github.com/filecoin-project/lassie/pkg/net/client" "github.com/filecoin-project/lassie/pkg/net/host" "github.com/filecoin-project/lassie/pkg/retriever" + "github.com/filecoin-project/lassie/pkg/retriever/graphsync/client" "github.com/filecoin-project/lassie/pkg/session" "github.com/filecoin-project/lassie/pkg/types" "github.com/ipfs/go-datastore" diff --git a/pkg/retriever/bitswapretriever.go b/pkg/retriever/bitswapretriever.go index 132fafba..fe9e65f9 100644 --- a/pkg/retriever/bitswapretriever.go +++ b/pkg/retriever/bitswapretriever.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "io" - "math" "sync/atomic" "time" @@ -15,18 +14,15 @@ import ( "github.com/filecoin-project/lassie/pkg/events" "github.com/filecoin-project/lassie/pkg/retriever/bitswaphelpers" "github.com/filecoin-project/lassie/pkg/types" + "github.com/filecoin-project/lassie/pkg/verifiedcar" "github.com/ipfs/boxo/bitswap/client" "github.com/ipfs/boxo/bitswap/network" "github.com/ipfs/boxo/blockservice" "github.com/ipfs/go-cid" - dagpb "github.com/ipld/go-codec-dagpb" "github.com/ipld/go-ipld-prime/datamodel" "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/linking/preload" - "github.com/ipld/go-ipld-prime/node/basicnode" - "github.com/ipld/go-ipld-prime/traversal" - "github.com/ipld/go-ipld-prime/traversal/selector" "github.com/ipni/go-libipni/metadata" "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/peer" @@ -225,15 +221,13 @@ func (br *bitswapRetrieval) RetrieveFromAsyncCandidates(ayncCandidates types.Inb traversalLinkSys.StorageReadOpener = loader } - // run the retrieval - err = easyTraverse( - ctx, - cidlink.Link{Cid: br.request.Cid}, - selector, - traversalLinkSys, - preloader, - br.request.MaxBlocks, - ) + err = verifiedcar.Config{ + Root: br.request.Cid, + Selector: selector, + ExpectPath: datamodel.ParsePath(br.request.Path), + MaxBlocks: br.request.MaxBlocks, + }.Traverse(ctx, traversalLinkSys, preloader) + if storage != nil { storage.Stop() } @@ -301,73 +295,3 @@ func loaderForSession(retrievalID types.RetrievalID, inProgressCids InProgressCi return bytes.NewReader(blk.RawData()), nil } } - -func noopVisitor(prog traversal.Progress, n datamodel.Node, reason traversal.VisitReason) error { - return nil -} - -func easyTraverse( - ctx context.Context, - root datamodel.Link, - traverseSelector datamodel.Node, - lsys linking.LinkSystem, - preloader preload.Loader, - maxBlocks uint64, -) error { - - lsys, ecr := newErrorCapturingReader(lsys) - protoChooser := dagpb.AddSupportToChooser(basicnode.Chooser) - - // retrieve first node - prototype, err := protoChooser(root, linking.LinkContext{Ctx: ctx}) - if err != nil { - return err - } - node, err := lsys.Load(linking.LinkContext{Ctx: ctx}, root, prototype) - if err != nil { - return err - } - - progress := traversal.Progress{ - Cfg: &traversal.Config{ - Ctx: ctx, - LinkSystem: lsys, - LinkTargetNodePrototypeChooser: protoChooser, - Preloader: preloader, - }, - } - if maxBlocks > 0 { - progress.Budget = &traversal.Budget{ - LinkBudget: int64(maxBlocks) - 1, // first block is already loaded - NodeBudget: math.MaxInt64, - } - } - progress.LastBlock.Link = root - compiledSelector, err := selector.ParseSelector(traverseSelector) - if err != nil { - return err - } - if err := progress.WalkAdv(node, compiledSelector, noopVisitor); err != nil { - return err - } - return ecr.Error -} - -type errorCapturingReader struct { - sro linking.BlockReadOpener - Error error -} - -func newErrorCapturingReader(lsys linking.LinkSystem) (linking.LinkSystem, *errorCapturingReader) { - ecr := &errorCapturingReader{sro: lsys.StorageReadOpener} - lsys.StorageReadOpener = ecr.StorageReadOpener - return lsys, ecr -} - -func (ecr *errorCapturingReader) StorageReadOpener(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) { - r, err := ecr.sro(lc, l) - if err != nil { - ecr.Error = err - } - return r, err -} diff --git a/pkg/net/client/client.go b/pkg/retriever/graphsync/client/client.go similarity index 92% rename from pkg/net/client/client.go rename to pkg/retriever/graphsync/client/client.go index 4cc0c18a..9d6eb16a 100644 --- a/pkg/net/client/client.go +++ b/pkg/retriever/graphsync/client/client.go @@ -25,8 +25,9 @@ import ( "github.com/filecoin-project/lassie/pkg/types" "github.com/ipfs/go-datastore" + "github.com/ipfs/go-graphsync" - graphsync "github.com/ipfs/go-graphsync/impl" + gsimpl "github.com/ipfs/go-graphsync/impl" gsnetwork "github.com/ipfs/go-graphsync/network" "github.com/ipfs/go-log/v2" @@ -70,7 +71,7 @@ type RetrievalClient struct { type Config struct { ChannelMonitorConfig dtchannelmonitor.Config Datastore datastore.Batching - GraphsyncOpts []graphsync.Option + GraphsyncOpts []gsimpl.Option Host host.Host RetrievalConfigurer datatransfer.TransportConfigurer } @@ -90,16 +91,16 @@ func NewClient(ctx context.Context, datastore datastore.Batching, host host.Host //OnRestartComplete func(id datatransfer.ChannelID) }, Datastore: datastore, - GraphsyncOpts: []graphsync.Option{ - graphsync.MaxInProgressIncomingRequests(maxInProgressIncomingRequests), - graphsync.MaxInProgressOutgoingRequests(maxInProgressOutgoingRequests), - graphsync.MaxMemoryResponder(maxMemoryResponder), - graphsync.MaxMemoryPerPeerResponder(maxMemoryPerPeerResponder), - graphsync.MaxInProgressIncomingRequestsPerPeer(maxInProgressIncomingRequestsPerPeer), - graphsync.MessageSendRetries(messageSendRetries), - graphsync.SendMessageTimeout(sendMessageTimeout), - graphsync.MaxLinksPerIncomingRequests(maxTraversalLinks), - graphsync.MaxLinksPerOutgoingRequests(maxTraversalLinks), + GraphsyncOpts: []gsimpl.Option{ + gsimpl.MaxInProgressIncomingRequests(maxInProgressIncomingRequests), + gsimpl.MaxInProgressOutgoingRequests(maxInProgressOutgoingRequests), + gsimpl.MaxMemoryResponder(maxMemoryResponder), + gsimpl.MaxMemoryPerPeerResponder(maxMemoryPerPeerResponder), + gsimpl.MaxInProgressIncomingRequestsPerPeer(maxInProgressIncomingRequestsPerPeer), + gsimpl.MessageSendRetries(messageSendRetries), + gsimpl.SendMessageTimeout(sendMessageTimeout), + gsimpl.MaxLinksPerIncomingRequests(maxTraversalLinks), + gsimpl.MaxLinksPerOutgoingRequests(maxTraversalLinks), }, Host: host, } @@ -113,12 +114,11 @@ func NewClient(ctx context.Context, datastore datastore.Batching, host host.Host // Creates a new RetrievalClient with the given Config func NewClientWithConfig(ctx context.Context, cfg *Config) (*RetrievalClient, error) { - - graphSync := graphsync.New(ctx, + graphSync := gsimpl.New(ctx, gsnetwork.NewFromLibp2pHost(cfg.Host), cidlink.DefaultLinkSystem(), cfg.GraphsyncOpts..., - ).(*graphsync.GraphSync) + ).(*gsimpl.GraphSync) dtNetwork := dtnetwork.NewFromLibp2pHost(cfg.Host) dtTransport := dttransport.NewTransport(cfg.Host.ID(), graphSync) @@ -183,6 +183,7 @@ func (rc *RetrievalClient) RetrieveFromPeer( proposal *retrievaltypes.DealProposal, sel ipld.Node, maxBlocks uint64, + progressCallback func(graphsync.ResponseProgress), eventsCallback datatransfer.Subscriber, gracefulShutdownRequested <-chan struct{}, ) (*types.RetrievalStats, error) { @@ -313,6 +314,7 @@ func (rc *RetrievalClient) RetrieveFromPeer( datatransfer.WithTransportOptions( dttransport.UseStore(linkSystem), dttransport.MaxLinks(maxBlocks), + dttransport.WithResponseProgressListener(progressCallback), ), ) if err != nil { @@ -333,7 +335,6 @@ awaitfinished: if err != nil { return nil, fmt.Errorf("data transfer failed: %w", err) } - logger.Debugf("data transfer for retrieval complete") break awaitfinished case <-gracefulShutdownRequested: diff --git a/pkg/net/client/client_test.go b/pkg/retriever/graphsync/client/client_test.go similarity index 98% rename from pkg/net/client/client_test.go rename to pkg/retriever/graphsync/client/client_test.go index 5fd8a5cd..9b245115 100644 --- a/pkg/net/client/client_test.go +++ b/pkg/retriever/graphsync/client/client_test.go @@ -10,6 +10,7 @@ import ( retrievaltypes "github.com/filecoin-project/go-retrieval-types" "github.com/filecoin-project/go-state-types/big" "github.com/ipfs/go-cid" + "github.com/ipfs/go-graphsync" "github.com/ipld/go-ipld-prime" "github.com/ipld/go-ipld-prime/datamodel" cidlink "github.com/ipld/go-ipld-prime/linking/cid" @@ -51,6 +52,7 @@ func TestClient(t *testing.T) { }, } eventsCb := func(event datatransfer.Event, channelState datatransfer.ChannelState) {} + progressCb := func(prog graphsync.ResponseProgress) {} gracefulShutdownRequested := make(chan struct{}) stats, err := client.RetrieveFromPeer( @@ -60,6 +62,7 @@ func TestClient(t *testing.T) { proposal, selector, 0, + progressCb, eventsCb, gracefulShutdownRequested, ) @@ -76,7 +79,6 @@ func TestClient(t *testing.T) { gotVoucher, err := retrievaltypes.BindnodeRegistry.TypeFromNode(voucher.Voucher, (*retrievaltypes.DealProposal)(nil)) require.NoError(t, err) require.Equal(t, proposal, gotVoucher) - require.Equal(t, proposal.PayloadCID, gotLoadFor) } @@ -99,6 +101,7 @@ func TestClient_BadSelector(t *testing.T) { }, } eventsCb := func(event datatransfer.Event, channelState datatransfer.ChannelState) {} + progressCb := func(prog graphsync.ResponseProgress) {} gracefulShutdownRequested := make(chan struct{}) stats, err := client.RetrieveFromPeer( @@ -108,6 +111,7 @@ func TestClient_BadSelector(t *testing.T) { proposal, selector, 0, + progressCb, eventsCb, gracefulShutdownRequested, ) diff --git a/pkg/retriever/graphsyncretriever.go b/pkg/retriever/graphsyncretriever.go index dceb614a..f4f94178 100644 --- a/pkg/retriever/graphsyncretriever.go +++ b/pkg/retriever/graphsyncretriever.go @@ -12,9 +12,12 @@ import ( "github.com/filecoin-project/go-state-types/big" "github.com/filecoin-project/lassie/pkg/events" "github.com/filecoin-project/lassie/pkg/types" + "github.com/filecoin-project/lassie/pkg/verifiedcar" "github.com/ipfs/go-cid" + "github.com/ipfs/go-graphsync" "github.com/ipld/go-ipld-prime" "github.com/ipld/go-ipld-prime/codec/dagjson" + "github.com/ipld/go-ipld-prime/datamodel" selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" "github.com/ipni/go-libipni/metadata" "github.com/libp2p/go-libp2p/core/peer" @@ -36,6 +39,7 @@ type GraphsyncClient interface { proposal *retrievaltypes.DealProposal, selector ipld.Node, maxLinks uint64, + progressCallback func(graphsync.ResponseProgress), eventsCallback datatransfer.Subscriber, gracefulShutdownRequested <-chan struct{}, ) (*types.RetrievalStats, error) @@ -214,13 +218,19 @@ func (pg *ProtocolGraphsync) Retrieve( } } + var lastPath datamodel.Path + progressCallback := func(progress graphsync.ResponseProgress) { + lastPath = progress.Path + } + stats, err := pg.Client.RetrieveFromPeer( retrieveCtx, retrieval.request.LinkSystem, candidate.MinerPeer.ID, proposal, selector, - uint64(retrieval.request.MaxBlocks), + retrieval.request.MaxBlocks, + progressCallback, eventsSubscriber, gracefulShutdownChan, ) @@ -245,5 +255,10 @@ func (pg *ProtocolGraphsync) Retrieve( if err != nil { return nil, fmt.Errorf("%w: %v", ErrRetrievalFailed, err) } + + if err := verifiedcar.CheckTraversalPath(datamodel.ParsePath(retrieval.request.Path), lastPath); err != nil { + return nil, err + } + return stats, nil } diff --git a/pkg/retriever/graphsyncretriever_test.go b/pkg/retriever/graphsyncretriever_test.go index 91d7c30b..82ac5c31 100644 --- a/pkg/retriever/graphsyncretriever_test.go +++ b/pkg/retriever/graphsyncretriever_test.go @@ -23,7 +23,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestRetrievalRacing(t *testing.T) { +func TestGraphsyncRetrievalRacing(t *testing.T) { retrievalID := types.RetrievalID(uuid.New()) startTime := time.Now().Add(time.Hour) initialPause := 10 * time.Millisecond @@ -678,22 +678,7 @@ func TestRetrievalRacing(t *testing.T) { candidates = append(candidates, types.NewRetrievalCandidate(peer.ID(p), nil, cid.Undef, protocol)) } - // we're testing the actual Session implementation, but we also want - // to observe, so MockSession WithActual lets us do that. - scfg := session.DefaultConfig(). - WithDefaultProviderConfig(session.ProviderConfig{ - RetrievalTimeout: time.Second, - }). - WithConnectTimeAlpha(0.0). // only use the last connect time - WithoutRandomness() - _session := session.NewSession(scfg, true) - session := testutil.NewMockSession(ctx) - session.WithActual(_session) - // register the retrieval so we don't get any "not found" errors out - // of the session - session.RegisterRetrieval(retrievalID, cid.Undef, basicnode.NewString("r")) - session.AddToRetrieval(retrievalID, []peer.ID{peerFoo, peerBar, peerBaz, peerBang}) - + session := setupTestSession(ctx, retrievalID, []peer.ID{"foo", "bar", "baz", "bang"}) cfg := retriever.NewGraphsyncRetrieverWithConfig(session, mockClient, clock, initialPause, true) rv := testutil.RetrievalVerifier{ @@ -732,7 +717,7 @@ func TestRetrievalRacing(t *testing.T) { } // run two retrievals simultaneously -func TestMultipleRetrievals(t *testing.T) { +func TestGraphsyncMultipleRetrievals(t *testing.T) { retrievalID1 := types.RetrievalID(uuid.New()) retrievalID2 := types.RetrievalID(uuid.New()) peerFoo := peer.ID("foo") @@ -861,13 +846,7 @@ func TestMultipleRetrievals(t *testing.T) { // we're testing the actual Session implementation, but we also want // to observe, so MockSession WithActual lets us do that. - scfg := session.DefaultConfig(). - WithDefaultProviderConfig(session.ProviderConfig{ - RetrievalTimeout: time.Second, - }). - WithConnectTimeAlpha(0.0). // only use the last connect time - WithoutRandomness() - _session := session.NewSession(scfg, true) + _session := session.NewSession(makeTestSession(), true) session := testutil.NewMockSession(ctx) session.WithActual(_session) // register the retrieval so we don't get any "not found" errors out @@ -918,7 +897,7 @@ func TestMultipleRetrievals(t *testing.T) { require.Equal(t, mockClient.GetRetrievalReturns()["bing"].ResultStats, stats) } -func TestRetrievalSelector(t *testing.T) { +func TestGraphsyncRetrievalSelector(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() retrievalID := types.RetrievalID(uuid.New()) @@ -931,13 +910,7 @@ func TestRetrievalSelector(t *testing.T) { clock.New(), ) - scfg := session.DefaultConfig(). - WithDefaultProviderConfig(session.ProviderConfig{ - RetrievalTimeout: time.Second, - }). - WithConnectTimeAlpha(0.0). // only use the last connect time - WithoutRandomness() - session := session.NewSession(scfg, true) + session := setupTestSession(ctx, retrievalID, []peer.ID{peerFoo, peerBar}) cfg := retriever.NewGraphsyncRetriever(session, mockClient) selector := selectorparse.CommonSelector_MatchPoint @@ -959,7 +932,69 @@ func TestRetrievalSelector(t *testing.T) { require.Same(t, selector, rr.Selector) } -func TestDuplicateRetreivals(t *testing.T) { +func TestGraphsyncRetrievalPathProgress(t *testing.T) { + expectPath := "/path/to/thing" + for _, tc := range []struct { + name string + paths []string + expectPass bool + unexpectedSegment bool + }{ + {"full", []string{"/path", "/path/to", "/path/to/thing"}, true, false}, + {"more", []string{"/path", "/path/to", "/path/to/thing", "/path/to/thing/more!"}, true, false}, + {"less", []string{"/path", "/path/to"}, false, false}, + {"single segment", []string{"/path"}, false, false}, + {"empty", []string{}, false, false}, + {"going awry", []string{"/path", "/path/to/other"}, false, true}, + {"completely wrong", []string{"/wot"}, false, true}, + } { + t.Run(tc.name, func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + retrievalID := types.RetrievalID(uuid.New()) + peerFoo := peer.ID("foo") + peerBar := peer.ID("bar") + cid1 := cid.MustParse("bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi") + mockClient := testutil.NewMockClient( + map[string]testutil.DelayedConnectReturn{"foo": {Err: nil, Delay: 0}}, + map[string]testutil.DelayedClientReturn{"foo": { + ResultStats: &types.RetrievalStats{StorageProviderId: peerBar, Size: 2}, + Delay: 0, + ProgressPaths: tc.paths, + }}, + clock.New(), + ) + + session := setupTestSession(ctx, retrievalID, []peer.ID{peerFoo, peerBar}) + cfg := retriever.NewGraphsyncRetriever(session, mockClient) + + retrieval := cfg.Retrieve(context.Background(), types.RetrievalRequest{ + Cid: cid1, + RetrievalID: retrievalID, + LinkSystem: cidlink.DefaultLinkSystem(), + Path: expectPath, + }, nil) + stats, err := retrieval.RetrieveFromAsyncCandidates(makeAsyncCandidates(t, []types.RetrievalCandidate{types.NewRetrievalCandidate(peerFoo, nil, cid.Undef, &metadata.GraphsyncFilecoinV1{})})) + if tc.expectPass { + require.NoError(t, err) + require.NotNil(t, stats) + require.Equal(t, mockClient.GetRetrievalReturns()["foo"].ResultStats, stats) + } else { + if tc.unexpectedSegment { + require.ErrorContains(t, err, "unexpected path segment visit") + } else { + require.ErrorContains(t, err, "failed to traverse full path") + } + } + + // make sure we performed the retrievals we expected + rr := mockClient.VerifyReceivedRetrievalFrom(ctx, t, peerFoo) + require.NotNil(t, rr) + }) + } +} + +func TestGraphsyncDuplicateRetreivals(t *testing.T) { retrievalID := types.RetrievalID(uuid.New()) peerFoo := peer.ID("foo") peerBar := peer.ID("bar") @@ -1053,20 +1088,7 @@ func TestDuplicateRetreivals(t *testing.T) { }, } - // we're testing the actual Session implementation, but we also want - // to observe, so MockSession WithActual lets us do that. - scfg := session.DefaultConfig(). - WithDefaultProviderConfig(session.ProviderConfig{ - RetrievalTimeout: time.Second, - }). - WithConnectTimeAlpha(0.0). // only use the last connect time - WithoutRandomness() - _session := session.NewSession(scfg, true) - session := testutil.NewMockSession(ctx) - session.WithActual(_session) - session.RegisterRetrieval(retrievalID, cid.Undef, basicnode.NewBool(true)) - session.AddToRetrieval(retrievalID, []peer.ID{peerFoo, peerBar, peerBaz}) - + session := setupTestSession(ctx, retrievalID, []peer.ID{peerFoo, peerBar, peerBaz}) cfg := retriever.NewGraphsyncRetrieverWithConfig(session, mockClient, clock, initialPause, true) results := testutil.RetrievalVerifier{ @@ -1103,3 +1125,21 @@ func makeAsyncCandidates(t *testing.T, candidates []types.RetrievalCandidate) ty close(outgoing) return incoming } + +func makeTestSession() *session.Config { + return session.DefaultConfig(). + WithDefaultProviderConfig(session.ProviderConfig{ + RetrievalTimeout: time.Second, + }). + WithConnectTimeAlpha(0.0). // only use the last connect time + WithoutRandomness() +} + +func setupTestSession(ctx context.Context, retrievalID types.RetrievalID, peers []peer.ID) *testutil.MockSession { + _session := session.NewSession(makeTestSession(), true) + session := testutil.NewMockSession(ctx) + session.WithActual(_session) + session.RegisterRetrieval(retrievalID, cid.Undef, basicnode.NewString("r")) + session.AddToRetrieval(retrievalID, peers) + return session +} diff --git a/pkg/retriever/httpretriever.go b/pkg/retriever/httpretriever.go index 0d85f951..b14a078d 100644 --- a/pkg/retriever/httpretriever.go +++ b/pkg/retriever/httpretriever.go @@ -15,6 +15,7 @@ import ( "github.com/filecoin-project/lassie/pkg/types" "github.com/filecoin-project/lassie/pkg/verifiedcar" "github.com/ipfs/go-cid" + "github.com/ipld/go-ipld-prime/datamodel" "github.com/ipni/go-libipni/metadata" "github.com/multiformats/go-multicodec" ) @@ -126,6 +127,7 @@ func (ph *ProtocolHttp) Retrieve( Selector: retrieval.request.GetSelector(), ExpectDuplicatesIn: true, MaxBlocks: retrieval.request.MaxBlocks, + ExpectPath: datamodel.ParsePath(retrieval.request.Path), } blockCount, byteCount, err := cfg.VerifyCar(ctx, rdr, retrieval.request.LinkSystem) diff --git a/pkg/retriever/parallelpeerretriever.go b/pkg/retriever/parallelpeerretriever.go index 7d8248a0..64530b03 100644 --- a/pkg/retriever/parallelpeerretriever.go +++ b/pkg/retriever/parallelpeerretriever.go @@ -351,7 +351,7 @@ func (retrieval *retrieval) runRetrievalCandidate( if retrievalErr != nil { // Exclude the case where the context was cancelled by the parent, which likely // means that another protocol has succeeded. - if ctx.Err() == nil || !errors.Is(err, context.Canceled) { + if ctx.Err() == nil || !errors.Is(retrievalErr, context.Canceled) { msg := retrievalErr.Error() if errors.Is(retrievalErr, ErrRetrievalTimedOut) { msg = fmt.Sprintf("timeout after %s", timeout) diff --git a/pkg/retriever/retriever.go b/pkg/retriever/retriever.go index 487878ee..cb88ebf6 100644 --- a/pkg/retriever/retriever.go +++ b/pkg/retriever/retriever.go @@ -28,6 +28,7 @@ var ( ErrProposalCreationFailed = errors.New("proposal creation failed") ErrRetrievalRegistrationFailed = errors.New("retrieval registration failed") ErrRetrievalFailed = errors.New("retrieval failed") + ErrRetrievalVerificationFailed = errors.New("retrieval verification failed") ErrAllRetrievalsFailed = errors.New("all retrievals failed") ErrConnectFailed = errors.New("unable to connect to provider") ErrAllQueriesFailed = errors.New("all queries failed") diff --git a/pkg/server/http/ipfs.go b/pkg/server/http/ipfs.go index 3ea6d477..e3b76904 100644 --- a/pkg/server/http/ipfs.go +++ b/pkg/server/http/ipfs.go @@ -83,6 +83,12 @@ func ipfsHandler(fetcher types.Fetcher, cfg HttpServerConfig) func(http.Response return } + byteRange, err := ParseByteRange(req) + if err != nil { + errorResponse(res, statusLogger, http.StatusBadRequest, err) + return + } + protocols, err := parseProtocols(req) if err != nil { errorResponse(res, statusLogger, http.StatusBadRequest, err) @@ -124,7 +130,7 @@ func ipfsHandler(fetcher types.Fetcher, cfg HttpServerConfig) func(http.Response tempStore := storage.NewDeferredStorageCar(cfg.TempDir, rootCid) var carWriter storage.DeferredWriter if includeDupes { - carWriter = storage.NewDuplicateAdderCarForStream(req.Context(), rootCid, path.String(), dagScope, tempStore, res) + carWriter = storage.NewDuplicateAdderCarForStream(req.Context(), rootCid, path.String(), dagScope, byteRange, tempStore, res) } else { carWriter = storage.NewDeferredCarWriterForStream(rootCid, res) } @@ -136,7 +142,7 @@ func ipfsHandler(fetcher types.Fetcher, cfg HttpServerConfig) func(http.Response }() var store types.ReadableWritableStorage = carStore - request, err := types.NewRequestForPath(store, rootCid, path.String(), dagScope) + request, err := types.NewRequestForPath(store, rootCid, path.String(), dagScope, byteRange) if err != nil { errorResponse(res, statusLogger, http.StatusInternalServerError, fmt.Errorf("failed to create request: %w", err)) return @@ -187,7 +193,22 @@ func ipfsHandler(fetcher types.Fetcher, cfg HttpServerConfig) func(http.Response } // servertiming metrics - logger.Debugw("fetching CID", "retrievalId", retrievalId, "CID", rootCid.String(), "path", path.String(), "dagScope", dagScope) + logger.Debugw("fetching CID", + "retrievalId", + retrievalId, + "CID", + rootCid.String(), + "path", + path.String(), + "dagScope", + dagScope, + "byteRange", + byteRange, + "includeDupes", + includeDupes, + "blockLimit", + blockLimit, + ) stats, err := fetcher.Fetch(req.Context(), request, servertimingsSubscriber(req)) // force all blocks to flush diff --git a/pkg/server/http/ipfs_test.go b/pkg/server/http/ipfs_test.go index 9a58af19..d93fe5e2 100644 --- a/pkg/server/http/ipfs_test.go +++ b/pkg/server/http/ipfs_test.go @@ -134,6 +134,14 @@ func TestIpfsHandler(t *testing.T) { wantStatus: http.StatusBadRequest, wantBody: "invalid providers parameter\n", }, + { + name: "400 on invalid entity-bytes query parameter", + method: "GET", + path: "/ipfs/bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4?entity-bytes=invalid", + headers: map[string]string{"Accept": "application/vnd.ipld.car"}, + wantStatus: http.StatusBadRequest, + wantBody: "invalid entity-bytes parameter\n", + }, { name: "404 when no candidates can be found", method: "GET", diff --git a/pkg/server/http/util.go b/pkg/server/http/util.go index 13be0574..93d033b0 100644 --- a/pkg/server/http/util.go +++ b/pkg/server/http/util.go @@ -14,15 +14,10 @@ import ( // parameter is not one of the supported values. func ParseScope(req *http.Request) (types.DagScope, error) { if req.URL.Query().Has("dag-scope") { - switch req.URL.Query().Get("dag-scope") { - case "all": - return types.DagScopeAll, nil - case "entity": - return types.DagScopeEntity, nil - case "block": - return types.DagScopeBlock, nil - default: - return types.DagScopeAll, errors.New("invalid dag-scope parameter") + if ds, err := types.ParseDagScope(req.URL.Query().Get("dag-scope")); err != nil { + return ds, errors.New("invalid dag-scope parameter") + } else { + return ds, nil } } // check for legacy param name -- to do -- delete once we confirm this isn't used any more @@ -41,6 +36,20 @@ func ParseScope(req *http.Request) (types.DagScope, error) { return types.DagScopeAll, nil } +// ParseByteRange returns the entity-bytes query parameter if one is set in the +// query string or nil if one is not set. An error is returned if an +// entity-bytes query string is not a valid byte range. +func ParseByteRange(req *http.Request) (*types.ByteRange, error) { + if req.URL.Query().Has("entity-bytes") { + br, err := types.ParseByteRange(req.URL.Query().Get("entity-bytes")) + if err != nil { + return nil, errors.New("invalid entity-bytes parameter") + } + return &br, nil + } + return nil, nil +} + // ParseFilename returns the filename query parameter or an error if the filename // extension is not ".car". Lassie only supports returning CAR data. // See https://specs.ipfs.tech/http-gateways/path-gateway/#filename-request-query-parameter diff --git a/pkg/storage/duplicateaddercar.go b/pkg/storage/duplicateaddercar.go index df11ef17..5ca8f0a5 100644 --- a/pkg/storage/duplicateaddercar.go +++ b/pkg/storage/duplicateaddercar.go @@ -14,6 +14,7 @@ import ( "github.com/ipfs/go-cid" carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/datamodel" "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" ) @@ -24,13 +25,23 @@ type DuplicateAdderCar struct { root cid.Cid path string scope types.DagScope + bytes *types.ByteRange store *DeferredStorageCar blockStream *blockStream streamCompletion chan error streamCompletionLk sync.Mutex } -func NewDuplicateAdderCarForStream(ctx context.Context, root cid.Cid, path string, scope types.DagScope, store *DeferredStorageCar, outStream io.Writer) *DuplicateAdderCar { +func NewDuplicateAdderCarForStream( + ctx context.Context, + root cid.Cid, + path string, + scope types.DagScope, + bytes *types.ByteRange, + store *DeferredStorageCar, + outStream io.Writer, +) *DuplicateAdderCar { + blockStream := &blockStream{ctx: ctx, seen: make(map[cid.Cid]struct{})} blockStream.blockBuffer = list.New() blockStream.cond = sync.NewCond(&blockStream.mu) @@ -43,6 +54,7 @@ func NewDuplicateAdderCarForStream(ctx context.Context, root cid.Cid, path strin root: root, path: path, scope: scope, + bytes: bytes, store: store, blockStream: blockStream, } @@ -53,13 +65,14 @@ func (da *DuplicateAdderCar) addDupes() { defer func() { da.streamCompletion <- err }() - sel := types.PathScopeSelector(da.path, da.scope) + sel := types.PathScopeSelector(da.path, da.scope, da.bytes) // we're going to do a verified car where we add dupes back in cfg := verifiedcar.Config{ Root: da.root, Selector: sel, WriteDuplicatesOut: true, + ExpectPath: datamodel.ParsePath(da.path), } lsys := cidlink.DefaultLinkSystem() @@ -148,7 +161,7 @@ func (bs *blockStream) WriteBlock(blk blocks.Block) error { return nil } -func (bs *blockStream) Next() (blocks.Block, error) { +func (bs *blockStream) Next(ctx context.Context) (blocks.Block, error) { bs.mu.Lock() defer bs.mu.Unlock() @@ -156,6 +169,8 @@ func (bs *blockStream) Next() (blocks.Block, error) { select { case <-bs.ctx.Done(): return nil, bs.ctx.Err() + case <-ctx.Done(): + return nil, ctx.Err() default: } if e := bs.blockBuffer.Front(); e != nil { diff --git a/pkg/storage/duplicateaddercar_test.go b/pkg/storage/duplicateaddercar_test.go index ba506385..29d03261 100644 --- a/pkg/storage/duplicateaddercar_test.go +++ b/pkg/storage/duplicateaddercar_test.go @@ -19,7 +19,6 @@ import ( ) func TestDuplicateAdderCar(t *testing.T) { - setupStore := &testutil.CorrectedMemStore{ParentStore: &memstore.Store{ Bag: make(map[string][]byte), }} @@ -34,7 +33,7 @@ func TestDuplicateAdderCar(t *testing.T) { store := storage.NewDeferredStorageCar("", unixfsFileWithDups.Root) ctx := context.Background() - carWriter := storage.NewDuplicateAdderCarForStream(ctx, unixfsFileWithDups.Root, "", types.DagScopeAll, store, buf) + carWriter := storage.NewDuplicateAdderCarForStream(ctx, unixfsFileWithDups.Root, "", types.DagScopeAll, nil, store, buf) cachingTempStore := storage.NewCachingTempStore(carWriter.BlockWriteOpener(), store) // write the root block, containing sharding metadata diff --git a/pkg/types/request.go b/pkg/types/request.go index 20dcedb7..e601281d 100644 --- a/pkg/types/request.go +++ b/pkg/types/request.go @@ -3,6 +3,7 @@ package types import ( "errors" "fmt" + "math" "strconv" "strings" @@ -13,7 +14,10 @@ import ( "github.com/ipld/go-ipld-prime" "github.com/ipld/go-ipld-prime/datamodel" cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime/node/basicnode" ipldstorage "github.com/ipld/go-ipld-prime/storage" + "github.com/ipld/go-ipld-prime/traversal/selector" + "github.com/ipld/go-ipld-prime/traversal/selector/builder" "github.com/libp2p/go-libp2p/core/peer" "github.com/multiformats/go-multicodec" ) @@ -72,6 +76,10 @@ type RetrievalRequest struct { // is not set, Scope and Path will be used to construct a selector. Scope DagScope + // Bytes is the optional byte range within the DAG to fetch. If not set + // the default byte range will fetch the entire file. + Bytes *ByteRange + // Duplicates is a flag that indicates whether duplicate blocks should be // stored into the LinkSystem where they occur in the traversal. Duplicates bool @@ -101,7 +109,14 @@ type RetrievalRequest struct { // and writing and it is explicitly set to be trusted (i.e. it will not // check CIDs match bytes). If the storage is not truested, // request.LinkSystem.TrustedStore should be set to false after this call. -func NewRequestForPath(store ipldstorage.WritableStorage, cid cid.Cid, path string, dagScope DagScope) (RetrievalRequest, error) { +func NewRequestForPath( + store ipldstorage.WritableStorage, + cid cid.Cid, + path string, + dagScope DagScope, + byteRange *ByteRange, +) (RetrievalRequest, error) { + retrievalId, err := NewRetrievalID() if err != nil { return RetrievalRequest{}, err @@ -120,13 +135,48 @@ func NewRequestForPath(store ipldstorage.WritableStorage, cid cid.Cid, path stri Cid: cid, Path: path, Scope: dagScope, + Bytes: byteRange, LinkSystem: linkSystem, }, nil } -func PathScopeSelector(path string, scope DagScope) ipld.Node { +// PathScopeSelector generates a selector for the given path, scope and byte +// range. Use DefaultByteRange() for the default byte range value if none is +// specified. +func PathScopeSelector(path string, scope DagScope, bytes *ByteRange) ipld.Node { // Turn the path / scope into a selector - return unixfsnode.UnixFSPathSelectorBuilder(path, scope.TerminalSelectorSpec(), false) + terminal := scope.TerminalSelectorSpec() + if !bytes.IsDefault() { + var to int64 = math.MaxInt64 + if bytes.To != nil { + to = *bytes.To + if to > 0 { + to++ // selector is exclusive, so increment the end + } + } + ssb := builder.NewSelectorSpecBuilder(basicnode.Prototype.Any) + // if we reach a terminal and it's not a file, then we need to fall-back to the default + // selector for the given scope. We do this with a union of the original terminal. + if scope == DagScopeEntity { + // entity is a special case which we can't just union with our matcher because it + // has its own matcher in it which we need to replace with the subset matcher. + terminal = ssb.ExploreInterpretAs("unixfs", + ssb.ExploreUnion( + ssb.MatcherSubset(bytes.From, to), + ssb.ExploreRecursive( + selector.RecursionLimitDepth(1), + ssb.ExploreAll(ssb.ExploreRecursiveEdge()), + ), + ), + ) + } else { + terminal = ssb.ExploreUnion( + ssb.ExploreInterpretAs("unixfs", ssb.MatcherSubset(bytes.From, to)), + terminal, + ) + } + } + return unixfsnode.UnixFSPathSelectorBuilder(path, terminal, false) } // GetSelector will safely return a selector for this request. If none has been @@ -135,7 +185,7 @@ func (r RetrievalRequest) GetSelector() ipld.Node { if r.Selector != nil { // custom selector return r.Selector } - return PathScopeSelector(r.Path, r.Scope) + return PathScopeSelector(r.Path, r.Scope, r.Bytes) } // GetUrlPath returns a URL path and query string valid with the Trusted HTTP @@ -155,11 +205,15 @@ func (r RetrievalRequest) GetUrlPath() (string, error) { if legacyScope == string(DagScopeEntity) { legacyScope = "file" } + byteRange := "" + if !r.Bytes.IsDefault() { + byteRange = "&entity-bytes=" + r.Bytes.String() + } path := r.Path if path != "" { path = "/" + path } - return fmt.Sprintf("%s?dag-scope=%s&car-scope=%s", path, scope, legacyScope), nil + return fmt.Sprintf("%s?dag-scope=%s&car-scope=%s%s", path, scope, legacyScope, byteRange), nil } // GetSupportedProtocols will safely return the supported protocols for a specific request. @@ -183,23 +237,31 @@ func (r RetrievalRequest) GetSupportedProtocols(allSupportedProtocols []multicod } func (r RetrievalRequest) Etag() string { - // https://github.com/ipfs/boxo/pull/303/commits/f61f95481041406df46a1781b1daab34b6605650#r1213918777 + // similar, but extended form of: + // https://github.com/ipfs/boxo/blob/a91e44dbdbd4c36a5b25a1b9df6ee237aa4442d2/gateway/handler_car.go#L167-L184 sb := strings.Builder{} sb.WriteString("/ipfs/") sb.WriteString(r.Cid.String()) if r.Path != "" { - sb.WriteString("/") + sb.WriteRune('/') sb.WriteString(datamodel.ParsePath(r.Path).String()) } if r.Scope != DagScopeAll { - sb.WriteString(".") + sb.WriteRune('.') sb.WriteString(string(r.Scope)) } + if !r.Bytes.IsDefault() { + sb.WriteRune('.') + sb.WriteString(strconv.FormatInt(r.Bytes.From, 10)) + if r.Bytes.To != nil { + sb.WriteRune('.') + sb.WriteString(strconv.FormatInt(*r.Bytes.To, 10)) + } + } if r.Duplicates { sb.WriteString(".dups") } sb.WriteString(".dfs") - // range bytes would go here: `.from.to` suffix := strconv.FormatUint(xxhash.Sum64([]byte(sb.String())), 32) return `"` + r.Cid.String() + ".car." + suffix + `"` } diff --git a/pkg/types/request_test.go b/pkg/types/request_test.go index 174cf7ae..554e6b3a 100644 --- a/pkg/types/request_test.go +++ b/pkg/types/request_test.go @@ -22,6 +22,7 @@ func TestEtag(t *testing.T) { cid cid.Cid path string scope types.DagScope + bytes *types.ByteRange dups bool expected string }{ @@ -115,14 +116,56 @@ func TestEtag(t *testing.T) { scope: types.DagScopeAll, expected: `"bafyrgqhai26anf3i7pips7q22coa4sz2fr4gk4q4sqdtymvvjyginfzaqewveaeqdh524nsktaq43j65v22xxrybrtertmcfxufdam3da3hbk.car.9lumqv26cg30t"`, }, + { + cid: cid.MustParse("QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK"), + scope: types.DagScopeAll, + bytes: &types.ByteRange{From: 0}, // default, not included + expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.58mf8vcmd2eo8"`, + }, + { + cid: cid.MustParse("QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK"), + scope: types.DagScopeAll, + bytes: &types.ByteRange{From: 10}, + expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.560ditjelh0u2"`, + }, + { + cid: cid.MustParse("QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK"), + scope: types.DagScopeAll, + bytes: &types.ByteRange{From: 0, To: ptr(200)}, + expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.faqf14andvfmb"`, + }, + { + cid: cid.MustParse("QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK"), + scope: types.DagScopeAll, + bytes: &types.ByteRange{From: 100, To: ptr(200)}, + expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.bvebrb14stt94"`, + }, + { + cid: cid.MustParse("QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK"), + scope: types.DagScopeEntity, + bytes: &types.ByteRange{From: 100, To: ptr(200)}, + expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.bq3u6t9t877t3"`, + }, + { + cid: cid.MustParse("QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK"), + scope: types.DagScopeEntity, + dups: true, + bytes: &types.ByteRange{From: 100, To: ptr(200)}, + expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.fhf498an52uqb"`, + }, } for _, tc := range testCases { - t.Run(fmt.Sprintf("%s:%s:%s:%v", tc.cid.String(), tc.path, tc.scope, tc.dups), func(t *testing.T) { + br := "" + if tc.bytes != nil { + br = ":" + tc.bytes.String() + } + t.Run(fmt.Sprintf("%s:%s:%s:%v%s", tc.cid.String(), tc.path, tc.scope, tc.dups, br), func(t *testing.T) { rr := types.RetrievalRequest{ Cid: tc.cid, Path: tc.path, Scope: tc.scope, + Bytes: tc.bytes, Duplicates: tc.dups, } actual := rr.Etag() @@ -132,3 +175,7 @@ func TestEtag(t *testing.T) { }) } } + +func ptr(i int64) *int64 { + return &i +} diff --git a/pkg/types/types.go b/pkg/types/types.go index cf7d3c1f..99bd87f4 100644 --- a/pkg/types/types.go +++ b/pkg/types/types.go @@ -5,6 +5,8 @@ import ( "errors" "fmt" "net/url" + "strconv" + "strings" "time" "github.com/filecoin-project/go-state-types/abi" @@ -275,7 +277,7 @@ func (ds DagScope) TerminalSelectorSpec() builder.SelectorSpec { case DagScopeAll: return unixfsnode.ExploreAllRecursivelySelector case DagScopeEntity: - return unixfsnode.MatchUnixFSPreloadSelector // file + return unixfsnode.MatchUnixFSEntitySelector case DagScopeBlock: return matcherSelector case DagScope(""): @@ -284,6 +286,70 @@ func (ds DagScope) TerminalSelectorSpec() builder.SelectorSpec { panic(fmt.Sprintf("unknown DagScope: [%s]", string(ds))) } +func ParseDagScope(s string) (DagScope, error) { + switch s { + case "all": + return DagScopeAll, nil + case "entity": + return DagScopeEntity, nil + case "block": + return DagScopeBlock, nil + default: + return DagScopeAll, errors.New("invalid dag-scope") + } +} + func (ds DagScope) AcceptHeader() string { return "application/vnd.ipld.car;version=1;order=dfs;dups=y" } + +// ByteRange represents a range of bytes in a file. The default value is 0 to +// the end of the file, [0:*]. +// The range is inclusive at both ends, so the case of From==To selects a single +// byte. +// Where the end is * or beyond the end of the file, the end of the file is +// selected. +type ByteRange struct { + From int64 + To *int64 +} + +// IsDefault is roughly equivalent to the range matching [0:*] +func (br *ByteRange) IsDefault() bool { + return br == nil || br.From == 0 && br.To == nil +} + +func (br *ByteRange) String() string { + if br.IsDefault() { + return "0:*" + } + to := "*" // default to end of file + if br.To != nil { + to = strconv.FormatInt(*br.To, 10) + } + return fmt.Sprintf("%d:%s", br.From, to) +} + +func ParseByteRange(s string) (ByteRange, error) { + br := ByteRange{} + if s == "" { + return br, nil + } + parts := strings.Split(s, ":") + if len(parts) != 2 { + return br, fmt.Errorf("invalid entity-bytes: %s", s) + } + var err error + br.From, err = strconv.ParseInt(parts[0], 10, 64) + if err != nil { + return br, fmt.Errorf("invalid entity-bytes: %s (%w)", s, err) + } + if parts[1] != "*" { + to, err := strconv.ParseInt(parts[1], 10, 64) + if err != nil { + return br, fmt.Errorf("invalid entity-bytes: %s (%w)", s, err) + } + br.To = &to + } + return br, nil +} diff --git a/pkg/verifiedcar/verifiedcar.go b/pkg/verifiedcar/verifiedcar.go index 03a4c4e6..7ec1295d 100644 --- a/pkg/verifiedcar/verifiedcar.go +++ b/pkg/verifiedcar/verifiedcar.go @@ -15,6 +15,7 @@ import ( _ "github.com/ipld/go-ipld-prime/codec/dagjson" _ "github.com/ipld/go-ipld-prime/codec/json" _ "github.com/ipld/go-ipld-prime/codec/raw" + "github.com/ipld/go-ipld-prime/traversal/selector" blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-cid" @@ -24,9 +25,9 @@ import ( "github.com/ipld/go-ipld-prime/datamodel" "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime/linking/preload" "github.com/ipld/go-ipld-prime/node/basicnode" "github.com/ipld/go-ipld-prime/traversal" - "github.com/ipld/go-ipld-prime/traversal/selector" "go.uber.org/multierr" ) @@ -39,8 +40,8 @@ var ( ErrMissingBlock = errors.New("missing block in CAR") ) -type BlockReader interface { - Next() (blocks.Block, error) +type BlockStream interface { + Next(ctx context.Context) (blocks.Block, error) } var protoChooser = dagpb.AddSupportToChooser(basicnode.Chooser) @@ -53,10 +54,9 @@ type Config struct { ExpectDuplicatesIn bool // Handles whether the incoming stream has duplicates WriteDuplicatesOut bool // Handles whether duplicates should be written a second time as blocks MaxBlocks uint64 // set a budget for the traversal + ExpectPath datamodel.Path // sets the expected IPLD path that the traversal should take, if set, this is used to determine whether the full expected traversal occurred } -func visitNoop(p traversal.Progress, n datamodel.Node, r traversal.VisitReason) error { return nil } - // Verify reads a CAR from the provided reader, verifies the contents are // strictly what is specified by this Config and writes the blocks to the // provided BlockWriteOpener. It returns the number of blocks and bytes @@ -88,30 +88,57 @@ func (cfg Config) VerifyCar(ctx context.Context, rdr io.Reader, lsys linking.Lin if cfg.CheckRootsMismatch && (len(cbr.Roots) != 1 || cbr.Roots[0] != cfg.Root) { return 0, 0, ErrBadRoots } - return cfg.VerifyBlockStream(ctx, cbr, lsys) + return cfg.VerifyBlockStream(ctx, blockReaderStream{cbr}, lsys) } -func (cfg Config) VerifyBlockStream(ctx context.Context, cbr BlockReader, lsys linking.LinkSystem) (uint64, uint64, error) { - sel, err := selector.CompileSelector(cfg.Selector) - if err != nil { +func (cfg Config) VerifyBlockStream(ctx context.Context, bs BlockStream, lsys linking.LinkSystem) (uint64, uint64, error) { + cr := &carReader{bs} + bt := &writeTracker{} + lsys.TrustedStorage = true // we can rely on the CAR decoder to check CID integrity + unixfsnode.AddUnixFSReificationToLinkSystem(&lsys) + lsys.StorageReadOpener = cfg.nextBlockReadOpener(ctx, cr, bt, lsys) + + // perform the traversal + if err := cfg.Traverse(ctx, lsys, nil); err != nil { return 0, 0, err } - cr := &carReader{ - cbr: cbr, + // make sure we don't have any extraneous data beyond what the traversal needs + _, err := bs.Next(ctx) + if err == nil { + return 0, 0, ErrExtraneousBlock + } else if !errors.Is(err, io.EOF) { + return 0, 0, err + } + + // wait for parser to finish and provide errors or stats + return bt.blocks, bt.bytes, nil +} + +// Traverse performs a traversal using the Config's Selector, starting at the +// Config's Root, using the provided LinkSystem and optional Preloader. +// +// The traversal will capture any errors that occur during traversal, block +// loading and will account for the Config's ExpectPath property, if set, to +// ensure that the full path-based traversal has occurred. +func (cfg Config) Traverse( + ctx context.Context, + lsys linking.LinkSystem, + preloader preload.Loader, +) error { + sel, err := selector.CompileSelector(cfg.Selector) + if err != nil { + return err } - bt := &writeTracker{} - lsys.TrustedStorage = true // we can rely on the CAR decoder to check CID integrity - unixfsnode.AddUnixFSReificationToLinkSystem(&lsys) - nbls, lsys := NewNextBlockLinkSystem(ctx, cfg, cr, bt, lsys) + lsys, ecr := NewErrorCapturingReader(lsys) - // run traversal in this goroutine progress := traversal.Progress{ Cfg: &traversal.Config{ Ctx: ctx, LinkSystem: lsys, LinkTargetNodePrototypeChooser: protoChooser, + Preloader: preloader, }, } if cfg.MaxBlocks > 0 { @@ -120,54 +147,71 @@ func (cfg Config) VerifyBlockStream(ctx context.Context, cbr BlockReader, lsys l NodeBudget: math.MaxInt64, } } - lc := linking.LinkContext{Ctx: ctx} - lnk := cidlink.Link{Cid: cfg.Root} - proto, err := protoChooser(lnk, lc) + + rootNode, err := loadNode(ctx, cfg.Root, lsys) if err != nil { - return 0, 0, err + return fmt.Errorf("failed to load root node: %w", err) } - rootNode, err := lsys.Load(lc, lnk, proto) - if err != nil { - return 0, 0, err + progress.LastBlock.Link = cidlink.Link{Cid: cfg.Root} + + var lastPath datamodel.Path + visitor := func(p traversal.Progress, n datamodel.Node, vr traversal.VisitReason) error { + lastPath = p.Path + if vr == traversal.VisitReason_SelectionMatch { + return unixfsnode.BytesConsumingMatcher(p, n) + } + return nil } - if err := progress.WalkAdv(rootNode, sel, visitNoop); err != nil { - return 0, 0, traversalError(err) + + if err := progress.WalkAdv(rootNode, sel, visitor); err != nil { + return traversalError(err) } - if nbls.Error != nil { - // capture any errors not bubbled up through the traversal, i.e. see - // https://github.com/ipld/go-ipld-prime/pull/524 - return 0, 0, nbls.Error + if err := CheckTraversalPath(cfg.ExpectPath, lastPath); err != nil { + return err } - // make sure we don't have any extraneous data beyond what the traversal needs - _, err = cbr.Next() - if err == nil { - return 0, 0, ErrExtraneousBlock - } else if !errors.Is(err, io.EOF) { - return 0, 0, err + if ecr.Error != nil { + return fmt.Errorf("block load failed during traversal: %w", ecr.Error) } + return nil +} - // wait for parser to finish and provide errors or stats - return bt.blocks, bt.bytes, nil +func CheckTraversalPath(expectPath datamodel.Path, lastTraversalPath datamodel.Path) error { + for expectPath.Len() > 0 { + if lastTraversalPath.Len() == 0 { + return fmt.Errorf("failed to traverse full path, missed: [%s]", expectPath.String()) + } + var seg, lastSeg datamodel.PathSegment + seg, expectPath = expectPath.Shift() + lastSeg, lastTraversalPath = lastTraversalPath.Shift() + if seg != lastSeg { + return fmt.Errorf("unexpected path segment visit, got [%s], expected [%s]", lastSeg.String(), seg.String()) + } + } + // having lastTraversalPath.Len()>0 is fine, it may be due to an "all" or + // "entity" doing an explore-all on the remainder of the DAG after the path; + // or it could be because ExpectPath was empty. + return nil } -type NextBlockLinkSystem struct { - Error error +func loadNode(ctx context.Context, rootCid cid.Cid, lsys linking.LinkSystem) (datamodel.Node, error) { + lnk := cidlink.Link{Cid: rootCid} + lnkCtx := linking.LinkContext{Ctx: ctx} + proto, err := protoChooser(lnk, lnkCtx) + if err != nil { + return nil, fmt.Errorf("failed to choose prototype for CID %s: %w", rootCid.String(), err) + } + rootNode, err := lsys.Load(lnkCtx, lnk, proto) + if err != nil { + return nil, fmt.Errorf("failed to load root CID: %w", err) + } + return rootNode, nil } -func NewNextBlockLinkSystem( - ctx context.Context, - cfg Config, - cr *carReader, - bt *writeTracker, - lsys linking.LinkSystem, -) (*NextBlockLinkSystem, linking.LinkSystem) { - nbls := &NextBlockLinkSystem{} +func (cfg *Config) nextBlockReadOpener(ctx context.Context, cr *carReader, bt *writeTracker, lsys linking.LinkSystem) linking.BlockReadOpener { seen := make(map[cid.Cid]struct{}) - storageReadOpener := lsys.StorageReadOpener - - nextBlockReadOpener := func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) { + return func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) { cid := l.(cidlink.Link).Cid var data []byte var err error @@ -183,7 +227,7 @@ func NewNextBlockLinkSystem( } } else { // duplicate block, rely on the supplied LinkSystem to have stored this - rdr, err := storageReadOpener(lc, l) + rdr, err := lsys.StorageReadOpener(lc, l) if !cfg.WriteDuplicatesOut { return rdr, err } @@ -216,26 +260,14 @@ func NewNextBlockLinkSystem( } return io.NopCloser(rdr), nil } - - // wrap nextBlockReadOpener in one that captures errors on `nbls` - lsys.StorageReadOpener = func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) { - rdr, err := nextBlockReadOpener(lc, l) - if err != nil { - nbls.Error = err - return nil, err - } - return rdr, nil - } - - return nbls, lsys } type carReader struct { - cbr BlockReader + bs BlockStream } func (cr *carReader) readNextBlock(ctx context.Context, expected cid.Cid) ([]byte, error) { - blk, err := cr.cbr.Next() + blk, err := cr.bs.Next(ctx) if err != nil { if errors.Is(err, io.EOF) { return nil, format.ErrNotFound{Cid: expected} @@ -271,3 +303,37 @@ func traversalError(original error) error { } } } + +// ErrorCapturingReader captures any errors that occur during block loading +// and makes them available via the Error property. +// +// This is useful for capturing errors that occur during traversal, which are +// not currently surfaced by the traversal package, see: +// +// https://github.com/ipld/go-ipld-prime/pull/524 +type ErrorCapturingReader struct { + sro linking.BlockReadOpener + Error error +} + +func NewErrorCapturingReader(lsys linking.LinkSystem) (linking.LinkSystem, *ErrorCapturingReader) { + ecr := &ErrorCapturingReader{sro: lsys.StorageReadOpener} + lsys.StorageReadOpener = ecr.StorageReadOpener + return lsys, ecr +} + +func (ecr *ErrorCapturingReader) StorageReadOpener(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) { + r, err := ecr.sro(lc, l) + if err != nil { + ecr.Error = err + } + return r, err +} + +type blockReaderStream struct { + cbr *car.BlockReader +} + +func (brs blockReaderStream) Next(ctx context.Context) (blocks.Block, error) { + return brs.cbr.Next() +} diff --git a/pkg/verifiedcar/verifiedcar_test.go b/pkg/verifiedcar/verifiedcar_test.go index 40698131..1e0d3b83 100644 --- a/pkg/verifiedcar/verifiedcar_test.go +++ b/pkg/verifiedcar/verifiedcar_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "errors" + "fmt" "io" "math/rand" "os" @@ -11,6 +12,7 @@ import ( "time" "github.com/filecoin-project/lassie/pkg/internal/testutil" + "github.com/filecoin-project/lassie/pkg/types" "github.com/filecoin-project/lassie/pkg/verifiedcar" blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-cid" @@ -19,16 +21,112 @@ import ( unixfs "github.com/ipfs/go-unixfsnode/testutil" "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/storage" + "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/codec/dagjson" "github.com/ipld/go-ipld-prime/datamodel" "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/node/basicnode" "github.com/ipld/go-ipld-prime/storage/memstore" "github.com/ipld/go-ipld-prime/traversal" + "github.com/ipld/go-ipld-prime/traversal/selector" + "github.com/ipld/go-ipld-prime/traversal/selector/builder" selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" + trustlesspathing "github.com/ipld/ipld/specs/pkg-go/trustless-pathing" "github.com/stretchr/testify/require" ) +func TestUnixfs20mVariety(t *testing.T) { + req := require.New(t) + + testCases, err := trustlesspathing.Unixfs20mVarietyCases() + req.NoError(err) + storage, closer, err := trustlesspathing.Unixfs20mVarietyReadableStorage() + req.NoError(err) + defer closer.Close() + + lsys := cidlink.DefaultLinkSystem() + lsys.TrustedStorage = true + unixfsnode.AddUnixFSReificationToLinkSystem(&lsys) + lsys.SetReadStorage(storage) + + for _, tc := range testCases { + t.Run(tc.Name, func(t *testing.T) { + req := require.New(t) + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + t.Logf("query=%s, blocks=%d", tc.AsQuery(), len(tc.ExpectedCids)) + + // tc.ExpectedCids is in the order we expect to see them in a properly + // formed trustless CAR for the given query. So we build our list of + // expected blocks in that order and feed it through makeCarStream to + // produce the expected CAR. + expectedBlocks := make([]expectedBlock, len(tc.ExpectedCids)) + for ii, ec := range tc.ExpectedCids { + byt, err := lsys.LoadRaw(linking.LinkContext{Ctx: ctx}, cidlink.Link{Cid: ec}) + req.NoError(err) + blk, err := blocks.NewBlockWithCid(byt, ec) + req.NoError(err) + expectedBlocks[ii] = expectedBlock{blk, false} + } + + carStream, errorCh := makeCarStream(t, ctx, []cid.Cid{tc.Root}, expectedBlocks, false, false, false, nil) + + lsys := cidlink.DefaultLinkSystem() + var writeCounter int + lsys.StorageWriteOpener = func(lc linking.LinkContext) (io.Writer, linking.BlockWriteCommitter, error) { + var buf bytes.Buffer + return &buf, func(l datamodel.Link) error { + req.Equal(expectedBlocks[writeCounter].Cid().String(), l.(cidlink.Link).Cid.String(), "block %d", writeCounter) + req.Equal(expectedBlocks[writeCounter].RawData(), buf.Bytes(), "block %d", writeCounter) + writeCounter++ + return nil + }, nil + } + lsys.StorageReadOpener = func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) { + return nil, fmt.Errorf("unexpected read of %s", l.String()) + } + + // Run the verifier over the CAR stream to see if we end up with + // the same query. + scope, err := types.ParseDagScope(tc.Scope) + req.NoError(err) + var byteRange *types.ByteRange + if tc.ByteRange != "" { + br, err := types.ParseByteRange(tc.ByteRange) + req.NoError(err) + byteRange = &br + } + cfg := verifiedcar.Config{ + Root: tc.Root, + Selector: types.PathScopeSelector(tc.Path, scope, byteRange), + } + { + selBytes, _ := ipld.Encode(cfg.Selector, dagjson.Encode) + t.Logf("selector=%s, entity-bytes=%s", string(selBytes), tc.ByteRange) + } + blockCount, byteCount, err := cfg.VerifyCar(ctx, carStream, lsys) + + req.NoError(err) + req.Equal(count(expectedBlocks), blockCount) + req.Equal(sizeOf(expectedBlocks), byteCount) + req.Equal(int(count(expectedBlocks)), writeCounter) + + select { + case err := <-errorCh: + req.NoError(err) + default: + } + + // Make sure we consumed the entire stream. + byt, err := io.ReadAll(carStream) + req.NoError(err) + req.Equal(0, len(byt)) + }) + } +} + func TestVerifiedCar(t *testing.T) { ctx := context.Background() @@ -59,9 +157,22 @@ func TestVerifiedCar(t *testing.T) { allSelector := selectorparse.CommonSelector_ExploreAllRecursively wrapPath := "/some/path/to/content" + wrapPathPlusMore := wrapPath + "/nope/not/here" unixfsFile := testutil.GenerateNoDupes(func() unixfs.DirEntry { return unixfs.GenerateFile(t, &lsys, rndReader, 4<<20) }) unixfsFileBlocks := testutil.ToBlocks(t, lsys, unixfsFile.Root, allSelector) + unixfsSmallFile := testutil.GenerateNoDupes(func() unixfs.DirEntry { return unixfs.GenerateFile(t, &lsys, rndReader, 1024) }) + + unixfsFileRange0_1048576Blocks := unixfsFileBlocks[0:6] + ssb := builder.NewSelectorSpecBuilder(basicnode.Prototype.Any) + ss := ssb.ExploreInterpretAs("unixfs", ssb.MatcherSubset(0, 1<<20)) + unixfsFileRange0_1048576Selector := ss.Node() + + // need the root plus the byte range of 1M->2M, which happens to include the + // block of the 0->1M range because of overlapping data + unixfsFileRange1048576_2097152Blocks := append(append([]blocks.Block{}, unixfsFileBlocks[0]), unixfsFileBlocks[5:10]...) + ss = ssb.ExploreInterpretAs("unixfs", ssb.MatcherSubset(1<<20, 2<<20)) + unixfsFileRange1048576_2097152Selector := ss.Node() unixfsFileWithDups := unixfs.GenerateFile(t, &lsys, testutil.ZeroReader{}, 4<<20) unixfsFileWithDupsBlocks := testutil.ToBlocks(t, lsys, unixfsFileWithDups.Root, allSelector) @@ -80,15 +191,21 @@ func TestVerifiedCar(t *testing.T) { }) unixfsShardedDirBlocks := testutil.ToBlocks(t, lsys, unixfsShardedDir.Root, allSelector) - unixfsPreloadSelector := unixfsnode.MatchUnixFSPreloadSelector.Node() + unixfsPreloadSelector := unixfsnode.MatchUnixFSEntitySelector.Node() unixfsPreloadDirBlocks := testutil.ToBlocks(t, lsys, unixfsDir.Root, unixfsPreloadSelector) unixfsPreloadShardedDirBlocks := testutil.ToBlocks(t, lsys, unixfsShardedDir.Root, unixfsPreloadSelector) - unixfsDirSubsetSelector := unixfsnode.UnixFSPathSelectorBuilder(unixfsDir.Children[1].Path, unixfsnode.MatchUnixFSPreloadSelector, false) + unixfsDirSubsetSelector := unixfsnode.UnixFSPathSelectorBuilder(unixfsDir.Children[1].Path, unixfsnode.MatchUnixFSEntitySelector, false) unixfsWrappedPathSelector := unixfsnode.UnixFSPathSelectorBuilder(wrapPath, unixfsnode.ExploreAllRecursivelySelector, false) - unixfsWrappedPreloadPathSelector := unixfsnode.UnixFSPathSelectorBuilder(wrapPath, unixfsnode.MatchUnixFSPreloadSelector, false) + unixfsWrappedPreloadPathSelector := unixfsnode.UnixFSPathSelectorBuilder(wrapPath, unixfsnode.MatchUnixFSEntitySelector, false) + preloadSubst := ssb.ExploreInterpretAs("unixfs", ssb.ExploreRecursive( + selector.RecursionLimitDepth(1), + ssb.ExploreAll(ssb.ExploreRecursiveEdge()), + )) + unixfsWrappedPreloadPathSelectorSubst := unixfsnode.UnixFSPathSelectorBuilder(wrapPath, preloadSubst, false) + unixfsWrappedPathPlusMoreSelector := unixfsnode.UnixFSPathSelectorBuilder(wrapPathPlusMore, unixfsnode.ExploreAllRecursivelySelector, false) unixfsWrappedFile := testutil.GenerateNoDupes(func() unixfs.DirEntry { return unixfs.WrapContent(t, rndReader, &lsys, unixfsFile, wrapPath, false) }) unixfsWrappedFileBlocks := testutil.ToBlocks(t, lsys, unixfsWrappedFile.Root, allSelector) @@ -97,6 +214,10 @@ func TestVerifiedCar(t *testing.T) { unixfsTrimmedWrappedFileBlocks := testutil.ToBlocks(t, lsys, unixfsWrappedFile.Root, unixfsWrappedPathSelector) unixfsExclusiveWrappedFile := testutil.GenerateNoDupes(func() unixfs.DirEntry { return unixfs.WrapContent(t, rndReader, &lsys, unixfsFile, wrapPath, true) }) unixfsExclusiveWrappedFileBlocks := testutil.ToBlocks(t, lsys, unixfsExclusiveWrappedFile.Root, allSelector) + unixfsExclusiveWrappedSmallFile := testutil.GenerateNoDupes(func() unixfs.DirEntry { + return unixfs.WrapContent(t, rndReader, &lsys, unixfsSmallFile, wrapPath, true) + }) + unixfsExclusiveWrappedSmallFileBlocks := testutil.ToBlocks(t, lsys, unixfsExclusiveWrappedSmallFile.Root, allSelector) unixfsWrappedShardedDir := testutil.GenerateNoDupes(func() unixfs.DirEntry { return unixfs.WrapContent(t, rndReader, &lsys, unixfsShardedDir, wrapPath, false) @@ -115,6 +236,7 @@ func TestVerifiedCar(t *testing.T) { mismatchedCidBlk, _ := blocks.NewBlockWithCid(extraneousByts, allBlocks[99].Cid()) testCases := []struct { name string + skip bool blocks []expectedBlock roots []cid.Cid carv2 bool @@ -426,8 +548,9 @@ func TestVerifiedCar(t *testing.T) { blocks: consumedBlocks(unixfsExclusiveWrappedShardedDirBlocks), roots: []cid.Cid{unixfsExclusiveWrappedShardedDir.Root}, cfg: verifiedcar.Config{ - Root: unixfsExclusiveWrappedShardedDir.Root, - Selector: unixfsWrappedPathSelector, + Root: unixfsExclusiveWrappedShardedDir.Root, + Selector: unixfsWrappedPathSelector, + ExpectPath: datamodel.ParsePath(wrapPath), }, }, { @@ -436,7 +559,7 @@ func TestVerifiedCar(t *testing.T) { roots: []cid.Cid{unixfsExclusiveWrappedShardedDir.Root}, cfg: verifiedcar.Config{ Root: unixfsExclusiveWrappedShardedDir.Root, - Selector: unixfsWrappedPreloadPathSelector, + Selector: unixfsWrappedPreloadPathSelectorSubst, }, }, { @@ -514,11 +637,64 @@ func TestVerifiedCar(t *testing.T) { Selector: allSelector, }, }, + { + name: "unixfs: large sharded file byte range [0:1M]", + blocks: consumedBlocks(unixfsFileRange0_1048576Blocks), + roots: []cid.Cid{unixfsFile.Root}, + cfg: verifiedcar.Config{ + Root: unixfsFile.Root, + Selector: unixfsFileRange0_1048576Selector, + }, + }, + { + name: "unixfs: large sharded file byte range [1M:2M]", + blocks: consumedBlocks(unixfsFileRange1048576_2097152Blocks), + roots: []cid.Cid{unixfsFile.Root}, + cfg: verifiedcar.Config{ + Root: unixfsFile.Root, + Selector: unixfsFileRange1048576_2097152Selector, + }, + }, + { + // pathing beyond the file means we don't do explore-all on the file's blocks + name: "unixfs: large sharded file wrapped in directories, pathed too far, errors", + blocks: consumedBlocks(unixfsExclusiveWrappedFileBlocks), + roots: []cid.Cid{unixfsExclusiveWrappedFile.Root}, + expectErr: "extraneous block in CAR", + cfg: verifiedcar.Config{ + Root: unixfsExclusiveWrappedFile.Root, + Selector: unixfsWrappedPathPlusMoreSelector, + }, + }, + { + name: "unixfs: small single-block file wrapped in directories, with ExpectPath", + blocks: consumedBlocks(unixfsExclusiveWrappedSmallFileBlocks), + roots: []cid.Cid{unixfsExclusiveWrappedSmallFile.Root}, + cfg: verifiedcar.Config{ + Root: unixfsExclusiveWrappedSmallFile.Root, + Selector: unixfsWrappedPathSelector, + ExpectPath: datamodel.ParsePath(wrapPath), + }, + }, + { + name: "unixfs: small single-block file wrapped in directories, pathed too far, errors", + blocks: consumedBlocks(unixfsExclusiveWrappedSmallFileBlocks), + roots: []cid.Cid{unixfsExclusiveWrappedSmallFile.Root}, + expectErr: "failed to traverse full path", + cfg: verifiedcar.Config{ + Root: unixfsExclusiveWrappedSmallFile.Root, + Selector: unixfsWrappedPathPlusMoreSelector, + ExpectPath: datamodel.ParsePath(wrapPathPlusMore), + }, + }, } for _, testCase := range testCases { testCase := testCase t.Run(testCase.name, func(t *testing.T) { + if testCase.skip { + t.Skip() + } t.Parallel() ctx, cancel := context.WithTimeout(ctx, 2*time.Second) @@ -556,12 +732,18 @@ func TestVerifiedCar(t *testing.T) { }, nil } - carStream := makeCarStream(t, ctx, testCase.roots, testCase.blocks, testCase.carv2, testCase.expectErr != "", testCase.incomingHasDups, testCase.streamErr) + carStream, errorCh := makeCarStream(t, ctx, testCase.roots, testCase.blocks, testCase.carv2, testCase.expectErr != "", testCase.incomingHasDups, testCase.streamErr) blockCount, byteCount, err := testCase.cfg.VerifyCar(ctx, carStream, lsys) // read the rest of data io.ReadAll(carStream) + select { + case err := <-errorCh: + req.NoError(err) + default: + } + if testCase.expectErr != "" { req.ErrorContains(err, testCase.expectErr) req.Equal(uint64(0), blockCount) @@ -585,13 +767,12 @@ func makeCarStream( expectErrors bool, allowDuplicatePuts bool, streamError error, -) io.Reader { +) (io.Reader, chan error) { r, w := io.Pipe() + errorCh := make(chan error, 1) go func() { - req := require.New(t) - var carW io.Writer = w var v2f *os.File @@ -600,7 +781,10 @@ func makeCarStream( // can't create a streaming v2 var err error v2f, err = os.CreateTemp(t.TempDir(), "carv2") - req.NoError(err) + if err != nil { + errorCh <- err + return + } t.Cleanup(func() { v2f.Close() os.Remove(v2f.Name()) @@ -609,8 +793,8 @@ func makeCarStream( } carWriter, err := storage.NewWritable(carW, roots, car.WriteAsCarV1(!carv2), car.AllowDuplicatePuts(allowDuplicatePuts)) - req.NoError(err) if err != nil { + errorCh <- err return } for ii, block := range blocks { @@ -619,14 +803,18 @@ func makeCarStream( return } err := carWriter.Put(ctx, block.Cid().KeyString(), block.RawData()) - if !expectErrors { - req.NoError(err) + if !expectErrors && err != nil { + errorCh <- err + return } if ctx.Err() != nil { return } } - req.NoError(carWriter.Finalize()) + if err := carWriter.Finalize(); err != nil { + errorCh <- err + return + } if carv2 { v2f.Seek(0, io.SeekStart) @@ -635,7 +823,9 @@ func makeCarStream( io.Copy(w, v2f) } - req.NoError(w.Close()) + if err := w.Close(); err != nil { + errorCh <- err + } }() go func() { @@ -645,7 +835,7 @@ func makeCarStream( } }() - return r + return r, errorCh } type expectedBlock struct {