From e5532fe4f9dba73d3762eab87fd749614c8dfb1d Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Tue, 16 Aug 2022 22:03:28 +1000 Subject: [PATCH] fix!: keep deserialised state stable until explicit mutation When decoding a badly serialised block with Links out of order, don't sort the list until we receive an explicit mutation operation. This ensures stable DAG traversal ordering based on the links as they appear in the serialised form and removes surprise-sorting when performing certain operations that wouldn't be expected to mutate. The pre-v0.4.0 behaviour was to always sort, but this behaviour wasn't baked in to the dag-pb spec and wasn't built into go-codec-dagpb which now forms the backend of ProtoNode, although remnants of sorting remain in some operations. Almost all CAR-from-DAG creation code in Go uses go-codec-dagpb and go-ipld-prime's traversal engine. However this can result in a different order when encountering badly encoded blocks (unsorted Links) where certain intermediate operations are performed on the ProtoNode prior to obtaining the Links() list (Links() itself doesn't sort, but e.g. RawData() does). The included "TestLinkSorting/decode" test is the only case that passes without this patch. Ref: https://github.com/ipld/ipld/pull/233 Ref: https://github.com/filecoin-project/boost/issues/673 Ref: https://github.com/filecoin-project/boost/pull/675 This commit was moved from ipfs/go-merkledag@48c72029ffaeed0c3f864af03bc0906d1d5fcb2c --- ipld/merkledag/coding.go | 33 ++++- ipld/merkledag/merkledag_test.go | 220 +++++++++++++++++++++++++++++++ ipld/merkledag/node.go | 99 +++++++++++--- 3 files changed, 328 insertions(+), 24 deletions(-) diff --git a/ipld/merkledag/coding.go b/ipld/merkledag/coding.go index bdc1dcde2..0a71b0ac5 100644 --- a/ipld/merkledag/coding.go +++ b/ipld/merkledag/coding.go @@ -23,6 +23,14 @@ const _ = pb.DoNotUpgradeFileEverItWillChangeYourHashes // for now, we use a PBNode intermediate thing. // because native go objects are nice. +// pbLinkSlice is a slice of pb.PBLink, similar to LinkSlice but for sorting the +// PB form +type pbLinkSlice []*pb.PBLink + +func (pbls pbLinkSlice) Len() int { return len(pbls) } +func (pbls pbLinkSlice) Swap(a, b int) { pbls[a], pbls[b] = pbls[b], pbls[a] } +func (pbls pbLinkSlice) Less(a, b int) bool { return *pbls[a].Name < *pbls[b].Name } + // unmarshal decodes raw data into a *Node instance. // The conversion uses an intermediate PBNode. func unmarshal(encodedBytes []byte) (*ProtoNode, error) { @@ -41,6 +49,9 @@ func fromImmutableNode(encoded *immutableProtoNode) *ProtoNode { n.data = n.encoded.PBNode.Data.Must().Bytes() } numLinks := n.encoded.PBNode.Links.Length() + // links may not be sorted after deserialization, but we don't change + // them until we mutate this node since we're representing the current, + // as-serialized state n.links = make([]*format.Link, numLinks) linkAllocs := make([]format.Link, numLinks) for i := int64(0); i < numLinks; i++ { @@ -60,12 +71,15 @@ func fromImmutableNode(encoded *immutableProtoNode) *ProtoNode { link.Cid = c n.links[i] = link } + // we don't set n.linksDirty because the order of the links list from + // serialized form needs to be stable, until we start mutating the ProtoNode return n } func (n *ProtoNode) marshalImmutable() (*immutableProtoNode, error) { + links := n.Links() nd, err := qp.BuildMap(dagpb.Type.PBNode, 2, func(ma ipld.MapAssembler) { - qp.MapEntry(ma, "Links", qp.List(int64(len(n.links)), func(la ipld.ListAssembler) { - for _, link := range n.links { + qp.MapEntry(ma, "Links", qp.List(int64(len(links)), func(la ipld.ListAssembler) { + for _, link := range links { qp.ListEntry(la, qp.Map(3, func(ma ipld.MapAssembler) { if link.Cid.Defined() { qp.MapEntry(ma, "Hash", qp.Link(cidlink.Link{Cid: link.Cid})) @@ -113,7 +127,6 @@ func (n *ProtoNode) GetPBNode() *pb.PBNode { pbn.Links = make([]*pb.PBLink, len(n.links)) } - sort.Stable(LinkSlice(n.links)) // keep links sorted for i, l := range n.links { pbn.Links[i] = &pb.PBLink{} pbn.Links[i].Name = &l.Name @@ -123,6 +136,11 @@ func (n *ProtoNode) GetPBNode() *pb.PBNode { } } + // Ensure links are sorted prior to encode, regardless of `linksDirty`. They + // may not have come sorted if we deserialized a badly encoded form that + // didn't have links already sorted. + sort.Stable(pbLinkSlice(pbn.Links)) + if len(n.data) > 0 { pbn.Data = n.data } @@ -132,8 +150,13 @@ func (n *ProtoNode) GetPBNode() *pb.PBNode { // EncodeProtobuf returns the encoded raw data version of a Node instance. // It may use a cached encoded version, unless the force flag is given. func (n *ProtoNode) EncodeProtobuf(force bool) ([]byte, error) { - sort.Stable(LinkSlice(n.links)) // keep links sorted - if n.encoded == nil || force { + if n.encoded == nil || n.linksDirty || force { + if n.linksDirty { + // there was a mutation involving links, make sure we sort before we build + // and cache a `Node` form that captures the current state + sort.Stable(LinkSlice(n.links)) + n.linksDirty = false + } n.cached = cid.Undef var err error n.encoded, err = n.marshalImmutable() diff --git a/ipld/merkledag/merkledag_test.go b/ipld/merkledag/merkledag_test.go index 17a05c6a4..a9a0f9142 100644 --- a/ipld/merkledag/merkledag_test.go +++ b/ipld/merkledag/merkledag_test.go @@ -25,6 +25,7 @@ import ( offline "github.com/ipfs/go-ipfs-exchange-offline" u "github.com/ipfs/go-ipfs-util" ipld "github.com/ipfs/go-ipld-format" + prime "github.com/ipld/go-ipld-prime" ) // makeDepthTestingGraph makes a small DAG with two levels. The level-two @@ -745,6 +746,225 @@ func TestEnumerateAsyncFailsNotFound(t *testing.T) { } } +func TestLinkSorting(t *testing.T) { + az := "az" + aaaa := "aaaa" + bbbb := "bbbb" + cccc := "cccc" + + azBlk := NewRawNode([]byte(az)) + aaaaBlk := NewRawNode([]byte(aaaa)) + bbbbBlk := NewRawNode([]byte(bbbb)) + ccccBlk := NewRawNode([]byte(cccc)) + pbn := &mdpb.PBNode{ + Links: []*mdpb.PBLink{ + {Hash: bbbbBlk.Cid().Bytes(), Name: &bbbb}, + {Hash: azBlk.Cid().Bytes(), Name: &az}, + {Hash: aaaaBlk.Cid().Bytes(), Name: &aaaa}, + {Hash: ccccBlk.Cid().Bytes(), Name: &cccc}, + }, + } + byts, err := pbn.Marshal() + if err != nil { + t.Fatal(err) + } + + mustLookupNodeString := func(t *testing.T, node prime.Node, name string) prime.Node { + subNode, err := node.LookupByString(name) + if err != nil { + t.Fatal(err) + } + return subNode + } + + mustLookupNodeIndex := func(t *testing.T, node prime.Node, idx int64) prime.Node { + subNode, err := node.LookupByIndex(idx) + if err != nil { + t.Fatal(err) + } + return subNode + } + + mustNodeAsString := func(t *testing.T, node prime.Node) string { + str, err := node.AsString() + if err != nil { + t.Fatal(err) + } + return str + } + + verifyUnsortedNode := func(t *testing.T, node *ProtoNode) { + links := node.Links() + if len(links) != 4 { + t.Errorf("wrong number of links, expected 4 but got %d", len(links)) + } + if links[0].Name != bbbb { + t.Errorf("expected link 0 to be 'bbbb', got %s", links[0].Name) + } + if links[1].Name != az { + t.Errorf("expected link 0 to be 'az', got %s", links[1].Name) + } + if links[2].Name != aaaa { + t.Errorf("expected link 0 to be 'aaaa', got %s", links[2].Name) + } + if links[3].Name != cccc { + t.Errorf("expected link 0 to be 'cccc', got %s", links[3].Name) + } + + // check the go-ipld-prime form + linksNode := mustLookupNodeString(t, node, "Links") + if linksNode.Length() != 4 { + t.Errorf("(Node) wrong number of links, expected 4 but got %d", len(links)) + } + if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 0), "Name")); name != bbbb { + t.Errorf("(Node) expected link 0 to be 'bbbb', got %s", name) + } + if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 1), "Name")); name != az { + t.Errorf("(Node) expected link 0 to be 'az', got %s", name) + } + if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 2), "Name")); name != aaaa { + t.Errorf("(Node) expected link 0 to be 'aaaa', got %s", name) + } + if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 3), "Name")); name != cccc { + t.Errorf("(Node) expected link 0 to be 'cccc', got %s", name) + } + } + + verifySortedNode := func(t *testing.T, node *ProtoNode) { + links := node.Links() + if len(links) != 4 { + t.Errorf("wrong number of links, expected 4 but got %d", len(links)) + } + if links[0].Name != aaaa { + t.Errorf("expected link 0 to be 'aaaa', got %s", links[0].Name) + } + if links[1].Name != az { + t.Errorf("expected link 0 to be 'az', got %s", links[1].Name) + } + if links[2].Name != bbbb { + t.Errorf("expected link 0 to be 'bbbb', got %s", links[2].Name) + } + if links[3].Name != cccc { + t.Errorf("expected link 0 to be 'cccc', got %s", links[3].Name) + } + + // check the go-ipld-prime form + linksNode := mustLookupNodeString(t, node, "Links") + if linksNode.Length() != 4 { + t.Errorf("(Node) wrong number of links, expected 4 but got %d", len(links)) + } + if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 0), "Name")); name != aaaa { + t.Errorf("(Node) expected link 0 to be 'aaaa', got %s", name) + } + if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 1), "Name")); name != az { + t.Errorf("(Node) expected link 0 to be 'az', got %s", name) + } + if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 2), "Name")); name != bbbb { + t.Errorf("(Node) expected link 0 to be 'bbbb', got %s", name) + } + if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 3), "Name")); name != cccc { + t.Errorf("(Node) expected link 0 to be 'cccc', got %s", name) + } + } + + t.Run("decode", func(t *testing.T) { + node, err := DecodeProtobuf(byts) + if err != nil { + t.Fatal(err) + } + verifyUnsortedNode(t, node) + }) + + t.Run("RawData() should not mutate, should return original form", func(t *testing.T) { + node, err := DecodeProtobuf(byts) + if err != nil { + t.Fatal(err) + } + rawData := node.RawData() + verifyUnsortedNode(t, node) + if !bytes.Equal(rawData, byts) { + t.Error("RawData() did not return original bytes") + } + }) + + t.Run("Size() should not mutate", func(t *testing.T) { + node, err := DecodeProtobuf(byts) + if err != nil { + t.Fatal(err) + } + sz, err := node.Size() + if err != nil { + t.Fatal(err) + } + if sz != 182 { + t.Errorf("expected size to be 182, got %d", sz) + } + verifyUnsortedNode(t, node) + }) + + t.Run("GetPBNode() should not mutate, returned PBNode should be sorted", func(t *testing.T) { + node, err := DecodeProtobuf(byts) + if err != nil { + t.Fatal(err) + } + rtPBNode := node.GetPBNode() + rtByts, err := rtPBNode.Marshal() + if err != nil { + t.Fatal(err) + } + verifyUnsortedNode(t, node) + rtNode, err := DecodeProtobuf(rtByts) + if err != nil { + t.Fatal(err) + } + verifySortedNode(t, rtNode) + }) + + t.Run("add and remove link should mutate", func(t *testing.T) { + node, err := DecodeProtobuf(byts) + if err != nil { + t.Fatal(err) + } + someCid, _ := cid.Cast([]byte{1, 85, 0, 5, 0, 1, 2, 3, 4}) + if err = node.AddRawLink("foo", &ipld.Link{ + Size: 10, + Cid: someCid, + }); err != nil { + t.Fatal(err) + } + if err = node.RemoveNodeLink("foo"); err != nil { + t.Fatal(err) + } + verifySortedNode(t, node) + }) + + t.Run("update link should not mutate, returned ProtoNode should be sorted", func(t *testing.T) { + node, err := DecodeProtobuf(byts) + if err != nil { + t.Fatal(err) + } + newNode, err := node.UpdateNodeLink("self", node) + if err != nil { + t.Fatal(err) + } + if err = newNode.RemoveNodeLink("self"); err != nil { + t.Fatal(err) + } + verifySortedNode(t, newNode) + verifyUnsortedNode(t, node) + }) + + t.Run("SetLinks() should mutate", func(t *testing.T) { + node, err := DecodeProtobuf(byts) + if err != nil { + t.Fatal(err) + } + links := node.Links() // clone + node.SetLinks(links) + verifySortedNode(t, node) + }) +} + func TestProgressIndicator(t *testing.T) { testProgressIndicator(t, 5) } diff --git a/ipld/merkledag/node.go b/ipld/merkledag/node.go index cafd9c39c..a3a719769 100644 --- a/ipld/merkledag/node.go +++ b/ipld/merkledag/node.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "sort" blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" @@ -41,10 +42,12 @@ type immutableProtoNode struct { // this mutable protonode implementation is needed to support go-unixfs, // the only library that implements both read and write for UnixFS v1. type ProtoNode struct { - links []*format.Link - data []byte + links []*format.Link + linksDirty bool + data []byte - // cache encoded/marshaled value + // cache encoded/marshaled value, kept to make the go-ipld-prime Node interface + // work (see prime.go), and to provide a cached []byte encoded form available encoded *immutableProtoNode cached cid.Cid @@ -115,7 +118,15 @@ func NodeWithData(d []byte) *ProtoNode { return &ProtoNode{data: d} } -// AddNodeLink adds a link to another node. +// AddNodeLink adds a link to another node. The link will be added in +// sorted order. +// +// If sorting has not already been applied to this node (because +// it was deserialized from a form that did not have sorted links), the links +// list will be sorted. If a ProtoNode was deserialized from a badly encoded +// form that did not already have its links sorted, calling AddNodeLink and then +// RemoveNodeLink for the same link, will not result in an identically encoded +// form as the links will have been sorted. func (n *ProtoNode) AddNodeLink(name string, that format.Node) error { lnk, err := format.MakeLink(that) if err != nil { @@ -129,22 +140,30 @@ func (n *ProtoNode) AddNodeLink(name string, that format.Node) error { return nil } -// AddRawLink adds a copy of a link to this node +// AddRawLink adds a copy of a link to this node. The link will be added in +// sorted order. +// +// If sorting has not already been applied to this node (because +// it was deserialized from a form that did not have sorted links), the links +// list will be sorted. If a ProtoNode was deserialized from a badly encoded +// form that did not already have its links sorted, calling AddRawLink and then +// RemoveNodeLink for the same link, will not result in an identically encoded +// form as the links will have been sorted. func (n *ProtoNode) AddRawLink(name string, l *format.Link) error { - n.encoded = nil n.links = append(n.links, &format.Link{ Name: name, Size: l.Size, Cid: l.Cid, }) - + n.linksDirty = true // needs a sort + n.encoded = nil return nil } -// RemoveNodeLink removes a link on this node by the given name. +// RemoveNodeLink removes a link on this node by the given name. If there are +// no links with this name, ErrLinkNotFound will be returned. If there are more +// than one link with this name, they will all be removed. func (n *ProtoNode) RemoveNodeLink(name string) error { - n.encoded = nil - ref := n.links[:0] found := false @@ -161,6 +180,11 @@ func (n *ProtoNode) RemoveNodeLink(name string) error { } n.links = ref + // Even though a removal won't change sorting, this node may have come from + // a deserialized state with badly sorted links. Now that we are mutating, + // we need to ensure the resulting link list is sorted when it gets consumed. + n.linksDirty = true + n.encoded = nil return nil } @@ -204,8 +228,10 @@ func (n *ProtoNode) GetLinkedNode(ctx context.Context, ds format.DAGService, nam return lnk.GetNode(ctx, ds) } -// Copy returns a copy of the node. -// NOTE: Does not make copies of Node objects in the links. +// Copy returns a copy of the node. The resulting node will have a properly +// sorted Links list regardless of whether the original came from a badly +// serialized form that didn't have a sorted list. +// NOTE: This does not make copies of Node objects in the links. func (n *ProtoNode) Copy() format.Node { nnode := new(ProtoNode) if len(n.data) > 0 { @@ -214,8 +240,11 @@ func (n *ProtoNode) Copy() format.Node { } if len(n.links) > 0 { - nnode.links = make([]*format.Link, len(n.links)) - copy(nnode.links, n.links) + nnode.links = append([]*format.Link(nil), n.links...) + // Sort links regardless of linksDirty state, this may have come from a + // serialized form that had badly sorted links, in which case linksDirty + // will not be true. + sort.Stable(LinkSlice(nnode.links)) } nnode.builder = n.builder @@ -244,7 +273,12 @@ func (n *ProtoNode) SetData(d []byte) { } // UpdateNodeLink return a copy of the node with the link name set to point to -// that. If a link of the same name existed, it is removed. +// that. The link will be added in sorted order. If a link of the same name +// existed, it is removed. +// +// If sorting has not already been applied to this node (because +// it was deserialized from a form that did not have sorted links), the links +// list will be sorted in the returned copy. func (n *ProtoNode) UpdateNodeLink(name string, that *ProtoNode) (*ProtoNode, error) { newnode := n.Copy().(*ProtoNode) _ = newnode.RemoveNodeLink(name) // ignore error @@ -309,12 +343,23 @@ func (n *ProtoNode) UnmarshalJSON(b []byte) error { } n.data = s.Data + // Links may not be sorted after deserialization, but we don't change + // them until we mutate this node since we're representing the current, + // as-serialized state. So n.linksDirty is not set here. n.links = s.Links + n.encoded = nil return nil } // MarshalJSON returns a JSON representation of the node. func (n *ProtoNode) MarshalJSON() ([]byte, error) { + if n.linksDirty { + // there was a mutation involving links, make sure we sort + sort.Stable(LinkSlice(n.links)) + n.linksDirty = false + n.encoded = nil + } + out := map[string]interface{}{ "data": n.data, "links": n.links, @@ -358,14 +403,23 @@ func (n *ProtoNode) Multihash() mh.Multihash { return n.cached.Hash() } -// Links returns the node links. +// Links returns a copy of the node's links. func (n *ProtoNode) Links() []*format.Link { - return n.links + if n.linksDirty { + // there was a mutation involving links, make sure we sort + sort.Stable(LinkSlice(n.links)) + n.linksDirty = false + n.encoded = nil + } + return append([]*format.Link(nil), n.links...) } -// SetLinks replaces the node links with the given ones. +// SetLinks replaces the node links with a copy of the provided links. Sorting +// will be applied to the list. func (n *ProtoNode) SetLinks(links []*format.Link) { - n.links = links + n.links = append([]*format.Link(nil), links...) + n.linksDirty = true // needs a sort + n.encoded = nil } // Resolve is an alias for ResolveLink. @@ -397,6 +451,13 @@ func (n *ProtoNode) Tree(p string, depth int) []string { return nil } + if n.linksDirty { + // there was a mutation involving links, make sure we sort + sort.Stable(LinkSlice(n.links)) + n.linksDirty = false + n.encoded = nil + } + out := make([]string, 0, len(n.links)) for _, lnk := range n.links { out = append(out, lnk.Name)