Skip to content

Commit

Permalink
fix!: keep deserialised state stable until explicit mutation
Browse files Browse the repository at this point in the history
When decoding a badly serialised block with Links out of order, don't sort
the list until we receive an explicit mutation operation. This ensures stable
DAG traversal ordering based on the links as they appear in the serialised
form and removes surprise-sorting when performing certain operations that
wouldn't be expected to mutate.

The pre-v0.4.0 behaviour was to always sort, but this behaviour wasn't baked
in to the dag-pb spec and wasn't built into go-codec-dagpb which now forms the
backend of ProtoNode, although remnants of sorting remain in some operations.
Almost all CAR-from-DAG creation code in Go uses go-codec-dagpb and
go-ipld-prime's traversal engine. However this can result in a different order
when encountering badly encoded blocks (unsorted Links) where certain
intermediate operations are performed on the ProtoNode prior to obtaining the
Links() list (Links() itself doesn't sort, but e.g. RawData() does).

The included "TestLinkSorting/decode" test is the only case that passes without
this patch.

Ref: ipld/ipld#233
Ref: filecoin-project/boost#673
Ref: filecoin-project/boost#675
  • Loading branch information
rvagg committed Aug 16, 2022
1 parent 1f81f3a commit 8a73123
Show file tree
Hide file tree
Showing 3 changed files with 219 additions and 13 deletions.
24 changes: 20 additions & 4 deletions coding.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ const _ = pb.DoNotUpgradeFileEverItWillChangeYourHashes
// for now, we use a PBNode intermediate thing.
// because native go objects are nice.

// pbLinkSlice is a slice of pb.PBLink, similar to LinkSlice but for sorting the
// PB form
type pbLinkSlice []*pb.PBLink

func (pbls pbLinkSlice) Len() int { return len(pbls) }
func (pbls pbLinkSlice) Swap(a, b int) { pbls[a], pbls[b] = pbls[b], pbls[a] }
func (pbls pbLinkSlice) Less(a, b int) bool { return *pbls[a].Name < *pbls[b].Name }

// unmarshal decodes raw data into a *Node instance.
// The conversion uses an intermediate PBNode.
func unmarshal(encodedBytes []byte) (*ProtoNode, error) {
Expand All @@ -41,6 +49,9 @@ func fromImmutableNode(encoded *immutableProtoNode) *ProtoNode {
n.data = n.encoded.PBNode.Data.Must().Bytes()
}
numLinks := n.encoded.PBNode.Links.Length()
// links may not be sorted after deserialization, but we don't change
// them until we mutate this node since we're representing the current,
// as-serialized state
n.links = make([]*format.Link, numLinks)
linkAllocs := make([]format.Link, numLinks)
for i := int64(0); i < numLinks; i++ {
Expand All @@ -63,9 +74,12 @@ func fromImmutableNode(encoded *immutableProtoNode) *ProtoNode {
return n
}
func (n *ProtoNode) marshalImmutable() (*immutableProtoNode, error) {
// ensure links are sorted, but don't modify existing link slice
links := n.Links()
sort.Stable(LinkSlice(links))
nd, err := qp.BuildMap(dagpb.Type.PBNode, 2, func(ma ipld.MapAssembler) {
qp.MapEntry(ma, "Links", qp.List(int64(len(n.links)), func(la ipld.ListAssembler) {
for _, link := range n.links {
qp.MapEntry(ma, "Links", qp.List(int64(len(links)), func(la ipld.ListAssembler) {
for _, link := range links {
qp.ListEntry(la, qp.Map(3, func(ma ipld.MapAssembler) {
if link.Cid.Defined() {
qp.MapEntry(ma, "Hash", qp.Link(cidlink.Link{Cid: link.Cid}))
Expand Down Expand Up @@ -113,7 +127,6 @@ func (n *ProtoNode) GetPBNode() *pb.PBNode {
pbn.Links = make([]*pb.PBLink, len(n.links))
}

sort.Stable(LinkSlice(n.links)) // keep links sorted
for i, l := range n.links {
pbn.Links[i] = &pb.PBLink{}
pbn.Links[i].Name = &l.Name
Expand All @@ -123,6 +136,10 @@ func (n *ProtoNode) GetPBNode() *pb.PBNode {
}
}

// Ensure links are sorted prior to encode. They may not have come sorted if
// we deserialized a badly encoded form that didn't have links already sorted.
sort.Stable(pbLinkSlice(pbn.Links))

if len(n.data) > 0 {
pbn.Data = n.data
}
Expand All @@ -132,7 +149,6 @@ func (n *ProtoNode) GetPBNode() *pb.PBNode {
// EncodeProtobuf returns the encoded raw data version of a Node instance.
// It may use a cached encoded version, unless the force flag is given.
func (n *ProtoNode) EncodeProtobuf(force bool) ([]byte, error) {
sort.Stable(LinkSlice(n.links)) // keep links sorted
if n.encoded == nil || force {
n.cached = cid.Undef
var err error
Expand Down
158 changes: 158 additions & 0 deletions merkledag_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,164 @@ func TestEnumerateAsyncFailsNotFound(t *testing.T) {
}
}

func TestLinkSorting(t *testing.T) {
az := "az"
aaaa := "aaaa"
bbbb := "bbbb"
cccc := "cccc"

azBlk := NewRawNode([]byte(az))
aaaaBlk := NewRawNode([]byte(aaaa))
bbbbBlk := NewRawNode([]byte(bbbb))
ccccBlk := NewRawNode([]byte(cccc))
pbn := &mdpb.PBNode{
Links: []*mdpb.PBLink{
{Hash: bbbbBlk.Cid().Bytes(), Name: &bbbb},
{Hash: azBlk.Cid().Bytes(), Name: &az},
{Hash: aaaaBlk.Cid().Bytes(), Name: &aaaa},
{Hash: ccccBlk.Cid().Bytes(), Name: &cccc},
},
}
byts, err := pbn.Marshal()
if err != nil {
t.Fatal(err)
}

verifyUnsortedNode := func(t *testing.T, node *ProtoNode) {
links := node.Links()
if len(links) != 4 {
t.Errorf("wrong number of links, expected 4 but got %d", len(links))
}
if links[0].Name != bbbb {
t.Errorf("expected link 0 to be 'bbbb', got %s", links[0].Name)
}
if links[1].Name != az {
t.Errorf("expected link 0 to be 'az', got %s", links[1].Name)
}
if links[2].Name != aaaa {
t.Errorf("expected link 0 to be 'aaaa', got %s", links[2].Name)
}
if links[3].Name != cccc {
t.Errorf("expected link 0 to be 'cccc', got %s", links[3].Name)
}
}
verifySortedNode := func(t *testing.T, node *ProtoNode) {
links := node.Links()
if len(links) != 4 {
t.Errorf("wrong number of links, expected 4 but got %d", len(links))
}
if links[0].Name != aaaa {
t.Errorf("expected link 0 to be 'aaaa', got %s", links[0].Name)
}
if links[1].Name != az {
t.Errorf("expected link 0 to be 'az', got %s", links[1].Name)
}
if links[2].Name != bbbb {
t.Errorf("expected link 0 to be 'bbbb', got %s", links[2].Name)
}
if links[3].Name != cccc {
t.Errorf("expected link 0 to be 'cccc', got %s", links[3].Name)
}
}

t.Run("decode", func(t *testing.T) {
node, err := DecodeProtobuf(byts)
if err != nil {
t.Fatal(err)
}
verifyUnsortedNode(t, node)
})

t.Run("RawData() should not mutate, should return original form", func(t *testing.T) {
node, err := DecodeProtobuf(byts)
if err != nil {
t.Fatal(err)
}
rawData := node.RawData()
verifyUnsortedNode(t, node)
if !bytes.Equal(rawData, byts) {
t.Error("RawData() did not return original bytes")
}
})

t.Run("Size() should not mutate", func(t *testing.T) {
node, err := DecodeProtobuf(byts)
if err != nil {
t.Fatal(err)
}
sz, err := node.Size()
if err != nil {
t.Fatal(err)
}
if sz != 182 {
t.Errorf("expected size to be 182, got %d", sz)
}
verifyUnsortedNode(t, node)
})

t.Run("GetPBNode() should not mutate, returned PBNode should be sorted", func(t *testing.T) {
node, err := DecodeProtobuf(byts)
if err != nil {
t.Fatal(err)
}
rtPBNode := node.GetPBNode()
rtByts, err := rtPBNode.Marshal()
if err != nil {
t.Fatal(err)
}
verifyUnsortedNode(t, node)
rtNode, err := DecodeProtobuf(rtByts)
if err != nil {
t.Fatal(err)
}
verifySortedNode(t, rtNode)
})

t.Run("add and remove link should mutate", func(t *testing.T) {
node, err := DecodeProtobuf(byts)
if err != nil {
t.Fatal(err)
}
someCid, _ := cid.Cast([]byte{1, 85, 0, 5, 0, 1, 2, 3, 4})
if err = node.AddRawLink("foo", &ipld.Link{
Size: 10,
Cid: someCid,
}); err != nil {
t.Fatal(err)
}
if err = node.RemoveNodeLink("foo"); err != nil {
t.Fatal(err)
}
verifySortedNode(t, node)
})

t.Run("update link should not mutate, returned ProtoNode should be sorted", func(t *testing.T) {
node, err := DecodeProtobuf(byts)
if err != nil {
t.Fatal(err)
}
newNode, err := node.UpdateNodeLink("self", node)
if err != nil {
t.Fatal(err)
}
if err = newNode.RemoveNodeLink("self"); err != nil {
t.Fatal(err)
}
verifySortedNode(t, newNode)
verifyUnsortedNode(t, node)
})

t.Run("SetLinks() should mutate", func(t *testing.T) {
node, err := DecodeProtobuf(byts)
if err != nil {
t.Fatal(err)
}
links := node.Links() // clone
node.SetLinks(links)
verifySortedNode(t, node)
})
}

func TestProgressIndicator(t *testing.T) {
testProgressIndicator(t, 5)
}
Expand Down
50 changes: 41 additions & 9 deletions node.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"sort"

blocks "github.com/ipfs/go-block-format"
cid "github.com/ipfs/go-cid"
Expand Down Expand Up @@ -115,7 +116,15 @@ func NodeWithData(d []byte) *ProtoNode {
return &ProtoNode{data: d}
}

// AddNodeLink adds a link to another node.
// AddNodeLink adds a link to another node. The link will be added in
// sorted order.
//
// If sorting has not already been applied to this node (because
// it was deserialized from a form that did not have sorted links), the links
// list will be sorted. If a ProtoNode was deserialized from a badly encoded
// form that did not already have its links sorted, calling AddNodeLink and then
// RemoveNodeLink for the same link, will not result in an identically encoded
// form as the links will have been sorted.
func (n *ProtoNode) AddNodeLink(name string, that format.Node) error {
lnk, err := format.MakeLink(that)
if err != nil {
Expand All @@ -129,19 +138,29 @@ func (n *ProtoNode) AddNodeLink(name string, that format.Node) error {
return nil
}

// AddRawLink adds a copy of a link to this node
// AddRawLink adds a copy of a link to this node. The link will be added in
// sorted order.
//
// If sorting has not already been applied to this node (because
// it was deserialized from a form that did not have sorted links), the links
// list will be sorted. If a ProtoNode was deserialized from a badly encoded
// form that did not already have its links sorted, calling AddRawLink and then
// RemoveNodeLink for the same link, will not result in an identically encoded
// form as the links will have been sorted.
func (n *ProtoNode) AddRawLink(name string, l *format.Link) error {
n.encoded = nil
n.links = append(n.links, &format.Link{
Name: name,
Size: l.Size,
Cid: l.Cid,
})

sort.Stable(LinkSlice(n.links))
return nil
}

// RemoveNodeLink removes a link on this node by the given name.
// RemoveNodeLink removes a link on this node by the given name. If there are
// no links with this name, ErrLinkNotFound will be returned. If there are more
// thank one link with this name, they will all be removed.
func (n *ProtoNode) RemoveNodeLink(name string) error {
n.encoded = nil

Expand Down Expand Up @@ -244,7 +263,12 @@ func (n *ProtoNode) SetData(d []byte) {
}

// UpdateNodeLink return a copy of the node with the link name set to point to
// that. If a link of the same name existed, it is removed.
// that. The link will be added in sorted order. If a link of the same name
// existed, it is removed.
//
// If sorting has not already been applied to this node (because
// it was deserialized from a form that did not have sorted links), the links
// list will be sorted in the returned copy.
func (n *ProtoNode) UpdateNodeLink(name string, that *ProtoNode) (*ProtoNode, error) {
newnode := n.Copy().(*ProtoNode)
_ = newnode.RemoveNodeLink(name) // ignore error
Expand Down Expand Up @@ -309,6 +333,9 @@ func (n *ProtoNode) UnmarshalJSON(b []byte) error {
}

n.data = s.Data
// links may not be sorted after deserialization, but we don't change
// them until we mutate this node since we're representing the current,
// as-serialized state
n.links = s.Links
return nil
}
Expand Down Expand Up @@ -358,14 +385,19 @@ func (n *ProtoNode) Multihash() mh.Multihash {
return n.cached.Hash()
}

// Links returns the node links.
// Links returns a copy of the node's links.
func (n *ProtoNode) Links() []*format.Link {
return n.links
links := make([]*format.Link, len(n.links))
copy(links, n.links)
return links
}

// SetLinks replaces the node links with the given ones.
// SetLinks replaces the node links with a copy of the provided links. Sorting
// will be applied to the list.
func (n *ProtoNode) SetLinks(links []*format.Link) {
n.links = links
n.links = make([]*format.Link, len(links))
copy(n.links, links)
sort.Stable(LinkSlice(n.links))
}

// Resolve is an alias for ResolveLink.
Expand Down

0 comments on commit 8a73123

Please sign in to comment.