From be8a3a57e3562ead548ccd0e89042d56171b4102 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Thu, 4 Aug 2022 19:57:31 +1000 Subject: [PATCH] v1.0.0/filclient-commp-fix branch + patch (#680) * fix: don't allow go-merkledag to reorder loaded links Ref: https://github.com/filecoin-project/boost/issues/673#issuecomment-1202312003 Ideally we can remove go-merkledag entirely from here because most (all?) of the deal-making infrastructure uses go-codec-dagpb + go-ipld-prime traversals and post-decode link-reordering is a subtle difference that impacts CAR ordering for non-spec-compliant DAG-PB blocks. * test: add test for CarOffsetWriter dag traverse order --- car/car_offset_writer.go | 13 +++++--- car/car_offset_writer_test.go | 56 +++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/car/car_offset_writer.go b/car/car_offset_writer.go index ef2271885..ac9b66c6e 100644 --- a/car/car_offset_writer.go +++ b/car/car_offset_writer.go @@ -106,8 +106,13 @@ func (s *CarOffsetWriter) writeBlocks(ctx context.Context, w io.Writer, headerSi return nil, fmt.Errorf("getting block %s: %w", c, err) } + // take a copy of the links array before nd.RawData() triggers a sort + links := make([]*format.Link, len(nd.Links())) + copy(links, nd.Links()) + byts := nd.RawData() + // Get the size of the block and metadata - ldsize := util.LdSize(nd.Cid().Bytes(), nd.RawData()) + ldsize := util.LdSize(nd.Cid().Bytes(), byts) // Check if the offset from which to start writing is in or before this // block @@ -121,7 +126,7 @@ func (s *CarOffsetWriter) writeBlocks(ctx context.Context, w io.Writer, headerSi // Write the block data to the writer, starting at the block offset _, err = skipWrite(w, blockWriteOffset, func(sw io.Writer) (int, error) { - return 0, util.LdWrite(sw, nd.Cid().Bytes(), nd.RawData()) + return 0, util.LdWrite(sw, nd.Cid().Bytes(), byts) }) if err != nil { return nil, fmt.Errorf("writing CAR block %s: %w", c, err) @@ -132,13 +137,13 @@ func (s *CarOffsetWriter) writeBlocks(ctx context.Context, w io.Writer, headerSi s.blockInfos.Put(nd.Cid(), &BlockInfo{ offset: offset, size: ldsize, - links: nd.Links(), + links: links, }) offset = nextBlockOffset // Return any links from this block to other DAG blocks - return nd.Links(), nil + return links, nil } seen := cid.NewSet() diff --git a/car/car_offset_writer_test.go b/car/car_offset_writer_test.go index 2effd13b3..0ad70fa20 100644 --- a/car/car_offset_writer_test.go +++ b/car/car_offset_writer_test.go @@ -7,6 +7,7 @@ import ( "math/rand" "testing" + blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-blockservice" "github.com/ipfs/go-cidutil" "github.com/ipfs/go-datastore" @@ -15,6 +16,7 @@ import ( chunk "github.com/ipfs/go-ipfs-chunker" format "github.com/ipfs/go-ipld-format" "github.com/ipfs/go-merkledag" + merkledagpb "github.com/ipfs/go-merkledag/pb" "github.com/ipfs/go-unixfs/importer/balanced" "github.com/ipfs/go-unixfs/importer/helpers" "github.com/ipld/go-car" @@ -22,6 +24,60 @@ import ( "github.com/stretchr/testify/require" ) +func TestCarOffsetWriterDagOrder(t *testing.T) { + ctx := context.Background() + ds := dss.MutexWrap(datastore.NewMapDatastore()) + bs := bstore.NewBlockstore(ds) + bserv := blockservice.New(bs, nil) + + // make a 3 block DAG, the links in the root aren't in DAG-PB spec sort order, + // they should be byte-order sorted by `Name` field, but we switch them here + // so we can test that traversal happens based on the links as they appear in + // the encoded bytes, which is what we get with go-codec-dagpb + go-ipld-prime + aaaa := "aaaa" + aaaaBlk := merkledag.NewRawNode([]byte(aaaa)) + require.NoError(t, bserv.AddBlock(ctx, aaaaBlk)) + bbbb := "bbbb" + bbbbBlk := merkledag.NewRawNode([]byte(bbbb)) + require.NoError(t, bserv.AddBlock(ctx, bbbbBlk)) + pbn := &merkledagpb.PBNode{ + Links: []*merkledagpb.PBLink{ + {Hash: bbbbBlk.Cid().Bytes(), Name: &bbbb}, + {Hash: aaaaBlk.Cid().Bytes(), Name: &aaaa}, + }, + } + rootByts, err := pbn.Marshal() + require.NoError(t, err) + rootBlk := blocks.NewBlock(rootByts) + require.NoError(t, bserv.AddBlock(ctx, rootBlk)) + + // execute + fullCarCow := NewCarOffsetWriter(rootBlk.Cid(), bs, NewBlockInfoCache()) + var fullBuff bytes.Buffer + require.NoError(t, fullCarCow.Write(context.Background(), &fullBuff, 0)) + + // decode result + reader, err := car.NewCarReader(bytes.NewReader(fullBuff.Bytes())) + require.NoError(t, err) + + // header + require.Equal(t, 1, len(reader.Header.Roots)) + require.Equal(t, rootBlk.Cid(), reader.Header.Roots[0]) + blk, err := reader.Next() + require.NoError(t, err) + + // 3 blocks, in the correct, unsorted order - root, bbbb, aaaa + require.Equal(t, blk.Cid(), rootBlk.Cid()) + blk, err = reader.Next() + require.NoError(t, err) + require.Equal(t, blk.Cid(), bbbbBlk.Cid()) + blk, err = reader.Next() + require.NoError(t, err) + require.Equal(t, blk.Cid(), aaaaBlk.Cid()) + _, err = reader.Next() + require.Error(t, err) +} + func TestCarOffsetWriter(t *testing.T) { ds := dss.MutexWrap(datastore.NewMapDatastore()) bs := bstore.NewBlockstore(ds)