From 98ae09ae6d4c2974a1a13c5ee1d4d00294a3cfc1 Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Mon, 5 Dec 2022 11:10:30 +0000 Subject: [PATCH] Fix hash mismatch error on matching link pointer On load, the default IPLD Link System checks the hash of loaded data against the given hash to assure that they match. The actual hash is calculated using the given link's `LinkPrototype.BuildLink`, and then is directly compared with the given link via `==`. Because the comparison is done by value, if the given link is a pointer the comparison fails which will result in false-positive hash mismatch error. To address this issue, instead of directly comparing objects using `==` check that their `Binary` value, i.e. their densest possible encoding, is equal. Add tests that assert the issue is fixed and works with existing link representations. --- datamodel/link.go | 2 +- linking/functions.go | 6 ++-- linking/functions_test.go | 73 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 4 deletions(-) create mode 100644 linking/functions_test.go diff --git a/datamodel/link.go b/datamodel/link.go index f52354ad..06231b3b 100644 --- a/datamodel/link.go +++ b/datamodel/link.go @@ -40,7 +40,7 @@ type Link interface { // the golang string type is used for immutability and for ease of use as a map key. // As with the String method, the returned value may not elide any parts of the hash. // - // Note that there is still no contract that the returned value be able to be parsed back into a Link value; + // Note that there is still no contract that the returned value should be parsable back into a Link value; // not even in the case of `lnk.Prototype().BuildLink(lnk.Binary()[:])`. // This is because the value returned by this method may contain data that the LinkPrototype would also restate. // (For a concrete example: if using CIDs, this method will return a binary string that includes diff --git a/linking/functions.go b/linking/functions.go index af0e72e9..6184c846 100644 --- a/linking/functions.go +++ b/linking/functions.go @@ -11,7 +11,7 @@ import ( // This file contains all the functions on LinkSystem. // These are the helpful, user-facing functions we expect folks to use "most of the time" when loading and storing data. -// Varations: +// Variations: // - Load vs Store vs ComputeLink // - Load vs LoadPlusRaw // - With or without LinkContext? @@ -141,7 +141,7 @@ func (lsys *LinkSystem) LoadRaw(lnkCtx LinkContext, lnk datamodel.Link) ([]byte, hasher.Write(buf.Bytes()) hash := hasher.Sum(nil) lnk2 := lnk.Prototype().BuildLink(hash) - if lnk2 != lnk { + if lnk2.Binary() != lnk.Binary() { return nil, ErrHashMismatch{Actual: lnk2, Expected: lnk} } // No codec to deploy; this is the raw load function. @@ -205,7 +205,7 @@ func (lsys *LinkSystem) Fill(lnkCtx LinkContext, lnk datamodel.Link, na datamode // (Then do a bit of a jig to build a link out of it -- because that's what we do the actual hash equality check on.) hash := hasher.Sum(nil) lnk2 := lnk.Prototype().BuildLink(hash) - if lnk2 != lnk { + if lnk2.Binary() != lnk.Binary() { return ErrHashMismatch{Actual: lnk2, Expected: lnk} } // If we got all the way through IO and through the hash check: diff --git a/linking/functions_test.go b/linking/functions_test.go new file mode 100644 index 00000000..543fd785 --- /dev/null +++ b/linking/functions_test.go @@ -0,0 +1,73 @@ +package linking_test + +import ( + "bytes" + "context" + "testing" + + qt "github.com/frankban/quicktest" + "github.com/ipfs/go-cid" + "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/codec/dagcbor" + "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipld/go-ipld-prime/fluent" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime/node/basicnode" + "github.com/ipld/go-ipld-prime/storage/memstore" + "github.com/multiformats/go-multicodec" +) + +func TestLinkSystem_LoadHashMismatch(t *testing.T) { + subject := cidlink.DefaultLinkSystem() + storage := &memstore.Store{} + subject.SetReadStorage(storage) + subject.SetWriteStorage(storage) + + // Construct some test IPLD node. + wantNode := fluent.MustBuildMap(basicnode.Prototype.Map, 1, func(na fluent.MapAssembler) { + na.AssembleEntry("fish").AssignString("barreleye") + }) + + // Encode as raw value to be used for testing LoadRaw + var buf bytes.Buffer + qt.Check(t, dagcbor.Encode(wantNode, &buf), qt.IsNil) + wantNodeRaw := buf.Bytes() + + // Store the test IPLD node and get link back. + lctx := ipld.LinkContext{Ctx: context.TODO()} + gotLink, err := subject.Store(lctx, cidlink.LinkPrototype{ + Prefix: cid.Prefix{ + Version: 1, + Codec: uint64(multicodec.DagCbor), + MhType: uint64(multicodec.Sha2_256), + MhLength: -1, + }, + }, wantNode) + qt.Check(t, err, qt.IsNil) + gotCidlink := gotLink.(cidlink.Link) + + // Assert all load variations return expected values for different link representations. + for _, test := range []struct { + name string + link datamodel.Link + }{ + {"datamodel.Link", gotLink}, + {"cidlink.Link", gotCidlink}, + {"&cidlink.Link", &gotCidlink}, + } { + t.Run(test.name, func(t *testing.T) { + gotNode, err := subject.Load(lctx, test.link, basicnode.Prototype.Any) + qt.Check(t, err, qt.IsNil) + qt.Check(t, ipld.DeepEqual(wantNode, gotNode), qt.IsTrue) + + gotNodeRaw, err := subject.LoadRaw(lctx, test.link) + qt.Check(t, err, qt.IsNil) + qt.Check(t, bytes.Equal(wantNodeRaw, gotNodeRaw), qt.IsTrue) + + gotNode, gotNodeRaw, err = subject.LoadPlusRaw(lctx, test.link, basicnode.Prototype.Any) + qt.Check(t, err, qt.IsNil) + qt.Check(t, ipld.DeepEqual(wantNode, gotNode), qt.IsTrue) + qt.Check(t, bytes.Equal(wantNodeRaw, gotNodeRaw), qt.IsTrue) + }) + } +}