Skip to content

Commit

Permalink
fix!: keep deserialised state stable until explicit mutation
Browse files Browse the repository at this point in the history
When decoding a badly serialised block with Links out of order, don't sort
the list until we receive an explicit mutation operation. This ensures stable
DAG traversal ordering based on the links as they appear in the serialised
form and removes surprise-sorting when performing certain operations that
wouldn't be expected to mutate.

The pre-v0.4.0 behaviour was to always sort, but this behaviour wasn't baked
in to the dag-pb spec and wasn't built into go-codec-dagpb which now forms the
backend of ProtoNode, although remnants of sorting remain in some operations.
Almost all CAR-from-DAG creation code in Go uses go-codec-dagpb and
go-ipld-prime's traversal engine. However this can result in a different order
when encountering badly encoded blocks (unsorted Links) where certain
intermediate operations are performed on the ProtoNode prior to obtaining the
Links() list (Links() itself doesn't sort, but e.g. RawData() does).

The included "TestLinkSorting/decode" test is the only case that passes without
this patch.

Ref: ipld/ipld#233
Ref: filecoin-project/boost#673
Ref: filecoin-project/boost#675
  • Loading branch information
rvagg committed Aug 22, 2022
1 parent 8b2b396 commit 0a25b5f
Show file tree
Hide file tree
Showing 3 changed files with 328 additions and 24 deletions.
33 changes: 28 additions & 5 deletions coding.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ const _ = pb.DoNotUpgradeFileEverItWillChangeYourHashes
// for now, we use a PBNode intermediate thing.
// because native go objects are nice.

// pbLinkSlice is a slice of pb.PBLink, similar to LinkSlice but for sorting the
// PB form
type pbLinkSlice []*pb.PBLink

func (pbls pbLinkSlice) Len() int { return len(pbls) }
func (pbls pbLinkSlice) Swap(a, b int) { pbls[a], pbls[b] = pbls[b], pbls[a] }
func (pbls pbLinkSlice) Less(a, b int) bool { return *pbls[a].Name < *pbls[b].Name }

// unmarshal decodes raw data into a *Node instance.
// The conversion uses an intermediate PBNode.
func unmarshal(encodedBytes []byte) (*ProtoNode, error) {
Expand All @@ -41,6 +49,9 @@ func fromImmutableNode(encoded *immutableProtoNode) *ProtoNode {
n.data = n.encoded.PBNode.Data.Must().Bytes()
}
numLinks := n.encoded.PBNode.Links.Length()
// links may not be sorted after deserialization, but we don't change
// them until we mutate this node since we're representing the current,
// as-serialized state
n.links = make([]*format.Link, numLinks)
linkAllocs := make([]format.Link, numLinks)
for i := int64(0); i < numLinks; i++ {
Expand All @@ -60,12 +71,15 @@ func fromImmutableNode(encoded *immutableProtoNode) *ProtoNode {
link.Cid = c
n.links[i] = link
}
// we don't set n.linksDirty because the order of the links list from
// serialized form needs to be stable, until we start mutating the ProtoNode
return n
}
func (n *ProtoNode) marshalImmutable() (*immutableProtoNode, error) {
links := n.Links()
nd, err := qp.BuildMap(dagpb.Type.PBNode, 2, func(ma ipld.MapAssembler) {
qp.MapEntry(ma, "Links", qp.List(int64(len(n.links)), func(la ipld.ListAssembler) {
for _, link := range n.links {
qp.MapEntry(ma, "Links", qp.List(int64(len(links)), func(la ipld.ListAssembler) {
for _, link := range links {
qp.ListEntry(la, qp.Map(3, func(ma ipld.MapAssembler) {
if link.Cid.Defined() {
qp.MapEntry(ma, "Hash", qp.Link(cidlink.Link{Cid: link.Cid}))
Expand Down Expand Up @@ -113,7 +127,6 @@ func (n *ProtoNode) GetPBNode() *pb.PBNode {
pbn.Links = make([]*pb.PBLink, len(n.links))
}

sort.Stable(LinkSlice(n.links)) // keep links sorted
for i, l := range n.links {
pbn.Links[i] = &pb.PBLink{}
pbn.Links[i].Name = &l.Name
Expand All @@ -123,6 +136,11 @@ func (n *ProtoNode) GetPBNode() *pb.PBNode {
}
}

// Ensure links are sorted prior to encode, regardless of `linksDirty`. They
// may not have come sorted if we deserialized a badly encoded form that
// didn't have links already sorted.
sort.Stable(pbLinkSlice(pbn.Links))

if len(n.data) > 0 {
pbn.Data = n.data
}
Expand All @@ -132,8 +150,13 @@ func (n *ProtoNode) GetPBNode() *pb.PBNode {
// EncodeProtobuf returns the encoded raw data version of a Node instance.
// It may use a cached encoded version, unless the force flag is given.
func (n *ProtoNode) EncodeProtobuf(force bool) ([]byte, error) {
sort.Stable(LinkSlice(n.links)) // keep links sorted
if n.encoded == nil || force {
if n.encoded == nil || n.linksDirty || force {
if n.linksDirty {
// there was a mutation involving links, make sure we sort before we build
// and cache a `Node` form that captures the current state
sort.Stable(LinkSlice(n.links))
n.linksDirty = false
}
n.cached = cid.Undef
var err error
n.encoded, err = n.marshalImmutable()
Expand Down
220 changes: 220 additions & 0 deletions merkledag_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
offline "github.com/ipfs/go-ipfs-exchange-offline"
u "github.com/ipfs/go-ipfs-util"
ipld "github.com/ipfs/go-ipld-format"
prime "github.com/ipld/go-ipld-prime"
)

// makeDepthTestingGraph makes a small DAG with two levels. The level-two
Expand Down Expand Up @@ -745,6 +746,225 @@ func TestEnumerateAsyncFailsNotFound(t *testing.T) {
}
}

func TestLinkSorting(t *testing.T) {
az := "az"
aaaa := "aaaa"
bbbb := "bbbb"
cccc := "cccc"

azBlk := NewRawNode([]byte(az))
aaaaBlk := NewRawNode([]byte(aaaa))
bbbbBlk := NewRawNode([]byte(bbbb))
ccccBlk := NewRawNode([]byte(cccc))
pbn := &mdpb.PBNode{
Links: []*mdpb.PBLink{
{Hash: bbbbBlk.Cid().Bytes(), Name: &bbbb},
{Hash: azBlk.Cid().Bytes(), Name: &az},
{Hash: aaaaBlk.Cid().Bytes(), Name: &aaaa},
{Hash: ccccBlk.Cid().Bytes(), Name: &cccc},
},
}
byts, err := pbn.Marshal()
if err != nil {
t.Fatal(err)
}

mustLookupNodeString := func(t *testing.T, node prime.Node, name string) prime.Node {
subNode, err := node.LookupByString(name)
if err != nil {
t.Fatal(err)
}
return subNode
}

mustLookupNodeIndex := func(t *testing.T, node prime.Node, idx int64) prime.Node {
subNode, err := node.LookupByIndex(idx)
if err != nil {
t.Fatal(err)
}
return subNode
}

mustNodeAsString := func(t *testing.T, node prime.Node) string {
str, err := node.AsString()
if err != nil {
t.Fatal(err)
}
return str
}

verifyUnsortedNode := func(t *testing.T, node *ProtoNode) {
links := node.Links()
if len(links) != 4 {
t.Errorf("wrong number of links, expected 4 but got %d", len(links))
}
if links[0].Name != bbbb {
t.Errorf("expected link 0 to be 'bbbb', got %s", links[0].Name)
}
if links[1].Name != az {
t.Errorf("expected link 0 to be 'az', got %s", links[1].Name)
}
if links[2].Name != aaaa {
t.Errorf("expected link 0 to be 'aaaa', got %s", links[2].Name)
}
if links[3].Name != cccc {
t.Errorf("expected link 0 to be 'cccc', got %s", links[3].Name)
}

// check the go-ipld-prime form
linksNode := mustLookupNodeString(t, node, "Links")
if linksNode.Length() != 4 {
t.Errorf("(Node) wrong number of links, expected 4 but got %d", len(links))
}
if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 0), "Name")); name != bbbb {
t.Errorf("(Node) expected link 0 to be 'bbbb', got %s", name)
}
if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 1), "Name")); name != az {
t.Errorf("(Node) expected link 0 to be 'az', got %s", name)
}
if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 2), "Name")); name != aaaa {
t.Errorf("(Node) expected link 0 to be 'aaaa', got %s", name)
}
if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 3), "Name")); name != cccc {
t.Errorf("(Node) expected link 0 to be 'cccc', got %s", name)
}
}

verifySortedNode := func(t *testing.T, node *ProtoNode) {
links := node.Links()
if len(links) != 4 {
t.Errorf("wrong number of links, expected 4 but got %d", len(links))
}
if links[0].Name != aaaa {
t.Errorf("expected link 0 to be 'aaaa', got %s", links[0].Name)
}
if links[1].Name != az {
t.Errorf("expected link 0 to be 'az', got %s", links[1].Name)
}
if links[2].Name != bbbb {
t.Errorf("expected link 0 to be 'bbbb', got %s", links[2].Name)
}
if links[3].Name != cccc {
t.Errorf("expected link 0 to be 'cccc', got %s", links[3].Name)
}

// check the go-ipld-prime form
linksNode := mustLookupNodeString(t, node, "Links")
if linksNode.Length() != 4 {
t.Errorf("(Node) wrong number of links, expected 4 but got %d", len(links))
}
if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 0), "Name")); name != aaaa {
t.Errorf("(Node) expected link 0 to be 'aaaa', got %s", name)
}
if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 1), "Name")); name != az {
t.Errorf("(Node) expected link 0 to be 'az', got %s", name)
}
if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 2), "Name")); name != bbbb {
t.Errorf("(Node) expected link 0 to be 'bbbb', got %s", name)
}
if name := mustNodeAsString(t, mustLookupNodeString(t, mustLookupNodeIndex(t, linksNode, 3), "Name")); name != cccc {
t.Errorf("(Node) expected link 0 to be 'cccc', got %s", name)
}
}

t.Run("decode", func(t *testing.T) {
node, err := DecodeProtobuf(byts)
if err != nil {
t.Fatal(err)
}
verifyUnsortedNode(t, node)
})

t.Run("RawData() should not mutate, should return original form", func(t *testing.T) {
node, err := DecodeProtobuf(byts)
if err != nil {
t.Fatal(err)
}
rawData := node.RawData()
verifyUnsortedNode(t, node)
if !bytes.Equal(rawData, byts) {
t.Error("RawData() did not return original bytes")
}
})

t.Run("Size() should not mutate", func(t *testing.T) {
node, err := DecodeProtobuf(byts)
if err != nil {
t.Fatal(err)
}
sz, err := node.Size()
if err != nil {
t.Fatal(err)
}
if sz != 182 {
t.Errorf("expected size to be 182, got %d", sz)
}
verifyUnsortedNode(t, node)
})

t.Run("GetPBNode() should not mutate, returned PBNode should be sorted", func(t *testing.T) {
node, err := DecodeProtobuf(byts)
if err != nil {
t.Fatal(err)
}
rtPBNode := node.GetPBNode()
rtByts, err := rtPBNode.Marshal()
if err != nil {
t.Fatal(err)
}
verifyUnsortedNode(t, node)
rtNode, err := DecodeProtobuf(rtByts)
if err != nil {
t.Fatal(err)
}
verifySortedNode(t, rtNode)
})

t.Run("add and remove link should mutate", func(t *testing.T) {
node, err := DecodeProtobuf(byts)
if err != nil {
t.Fatal(err)
}
someCid, _ := cid.Cast([]byte{1, 85, 0, 5, 0, 1, 2, 3, 4})
if err = node.AddRawLink("foo", &ipld.Link{
Size: 10,
Cid: someCid,
}); err != nil {
t.Fatal(err)
}
if err = node.RemoveNodeLink("foo"); err != nil {
t.Fatal(err)
}
verifySortedNode(t, node)
})

t.Run("update link should not mutate, returned ProtoNode should be sorted", func(t *testing.T) {
node, err := DecodeProtobuf(byts)
if err != nil {
t.Fatal(err)
}
newNode, err := node.UpdateNodeLink("self", node)
if err != nil {
t.Fatal(err)
}
if err = newNode.RemoveNodeLink("self"); err != nil {
t.Fatal(err)
}
verifySortedNode(t, newNode)
verifyUnsortedNode(t, node)
})

t.Run("SetLinks() should mutate", func(t *testing.T) {
node, err := DecodeProtobuf(byts)
if err != nil {
t.Fatal(err)
}
links := node.Links() // clone
node.SetLinks(links)
verifySortedNode(t, node)
})
}

func TestProgressIndicator(t *testing.T) {
testProgressIndicator(t, 5)
}
Expand Down
Loading

0 comments on commit 0a25b5f

Please sign in to comment.