Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't abort block processing when encountering SkipMe #251

Merged
merged 9 commits into from
Sep 30, 2021
3 changes: 3 additions & 0 deletions traversal/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ func (prog *Progress) init() {
prog.Cfg = &Config{}
}
prog.Cfg.init()
if prog.Cfg.LinkVisitOnlyOnce {
prog.SeenLinks = make(map[datamodel.Link]struct{})
}
}

// asPathSegment figures out how to coerce a node into a PathSegment.
Expand Down
4 changes: 3 additions & 1 deletion traversal/fns.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,15 @@ type Progress struct {
Path datamodel.Path
Link datamodel.Link
}
Budget *Budget // If present, tracks "budgets" for how many more steps we're willing to take before we should halt.
Budget *Budget // If present, tracks "budgets" for how many more steps we're willing to take before we should halt.
SeenLinks map[datamodel.Link]struct{} // Set used to remember which links have been visited before, if Cfg.LinkVisitOnlyOnce is true.
}

type Config struct {
Ctx context.Context // Context carried through a traversal. Optional; use it if you need cancellation.
LinkSystem linking.LinkSystem // LinkSystem used for automatic link loading, and also any storing if mutation features (e.g. traversal.Transform) are used.
LinkTargetNodePrototypeChooser LinkTargetNodePrototypeChooser // Chooser for Node implementations to produce during automatic link traversal.
LinkVisitOnlyOnce bool // By default, we visit across links wherever we see them again, even if we've visited them before, because the reason for visiting might be different than it was before since we got to it via a different path. If set to true, track links we've seen before in Progress.SeenLinks and do not visit them again. Note that sufficiently complex selectors may require valid revisiting of some links, so setting this to true can change behavior noticably and should be done with care.
}

type Budget struct {
Expand Down
14 changes: 12 additions & 2 deletions traversal/focus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ import (

var store = storage.Memory{}
var (
leafAlpha, leafAlphaLnk = encode(basicnode.NewString("alpha"))
leafBeta, leafBetaLnk = encode(basicnode.NewString("beta"))
// baguqeeyexkjwnfy
leafAlpha, leafAlphaLnk = encode(basicnode.NewString("alpha"))
// baguqeeyeqvc7t3a
leafBeta, leafBetaLnk = encode(basicnode.NewString("beta"))
// baguqeeyezhlahvq
middleMapNode, middleMapNodeLnk = encode(fluent.MustBuildMap(basicnode.Prototype.Map, 3, func(na fluent.MapAssembler) {
na.AssembleEntry("foo").AssignBool(true)
na.AssembleEntry("bar").AssignBool(false)
Expand All @@ -33,12 +36,19 @@ var (
na.AssembleEntry("nonlink").AssignString("zoo")
})
}))
// baguqeeyehfkkfwa
middleListNode, middleListNodeLnk = encode(fluent.MustBuildList(basicnode.Prototype.List, 4, func(na fluent.ListAssembler) {
na.AssembleValue().AssignLink(leafAlphaLnk)
na.AssembleValue().AssignLink(leafAlphaLnk)
na.AssembleValue().AssignLink(leafBetaLnk)
na.AssembleValue().AssignLink(leafAlphaLnk)
}))
// note that using `rootNode` directly will have a different field ordering than
// the encoded form if you were to load `rootNodeLnk` due to dag-json field
// reordering on encode, beware the difference for traversal order between
// created, in-memory nodes and those that have passed through a codec with
// field ordering rules
// baguqeeyeie4ajfy
rootNode, rootNodeLnk = encode(fluent.MustBuildMap(basicnode.Prototype.Map, 4, func(na fluent.MapAssembler) {
na.AssembleEntry("plain").AssignString("olde string")
na.AssembleEntry("linkedString").AssignLink(leafAlphaLnk)
Expand Down
16 changes: 14 additions & 2 deletions traversal/walk.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,18 @@ func (prog Progress) walkAdv_iterateAll(n datamodel.Node, s selector.Selector, f
progNext.Path = prog.Path.AppendSegment(ps)
if v.Kind() == datamodel.Kind_Link {
lnk, _ := v.AsLink()
if prog.Cfg.LinkVisitOnlyOnce {
if _, seen := prog.SeenLinks[lnk]; seen {
continue
}
prog.SeenLinks[lnk] = struct{}{}
}
progNext.LastBlock.Path = progNext.Path
progNext.LastBlock.Link = lnk
v, err = progNext.loadLink(v, n)
if err != nil {
if _, ok := err.(SkipMe); ok {
return nil
continue
}
return err
}
Expand Down Expand Up @@ -169,12 +175,18 @@ func (prog Progress) walkAdv_iterateSelective(n datamodel.Node, attn []datamodel
progNext.Path = prog.Path.AppendSegment(ps)
if v.Kind() == datamodel.Kind_Link {
lnk, _ := v.AsLink()
if prog.Cfg.LinkVisitOnlyOnce {
if _, seen := prog.SeenLinks[lnk]; seen {
continue
}
prog.SeenLinks[lnk] = struct{}{}
}
progNext.LastBlock.Path = progNext.Path
progNext.LastBlock.Link = lnk
v, err = progNext.loadLink(v, n)
if err != nil {
if _, ok := err.(SkipMe); ok {
return nil
continue
}
return err
}
Expand Down
168 changes: 164 additions & 4 deletions traversal/walk_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package traversal_test

import (
"fmt"
"io"
"log"
"strings"
"testing"

qt "github.com/frankban/quicktest"
Expand All @@ -9,17 +13,22 @@ import (
_ "github.com/ipld/go-ipld-prime/codec/dagjson"
"github.com/ipld/go-ipld-prime/datamodel"
"github.com/ipld/go-ipld-prime/fluent"
"github.com/ipld/go-ipld-prime/linking"
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
"github.com/ipld/go-ipld-prime/node/basicnode"
"github.com/ipld/go-ipld-prime/traversal"
"github.com/ipld/go-ipld-prime/traversal/selector"
"github.com/ipld/go-ipld-prime/traversal/selector/builder"
selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse"
)

/* Remember, we've got the following fixtures in scope:
var (
leafAlpha, leafAlphaLnk = encode(basicnode.NewString("alpha"))
leafBeta, leafBetaLnk = encode(basicnode.NewString("beta"))
// baguqeeyexkjwnfy
leafAlpha, leafAlphaLnk = encode(basicnode.NewString("alpha"))
// baguqeeyeqvc7t3a
leafBeta, leafBetaLnk = encode(basicnode.NewString("beta"))
// baguqeeyezhlahvq
middleMapNode, middleMapNodeLnk = encode(fluent.MustBuildMap(basicnode.Prototype.Map, 3, func(na fluent.MapAssembler) {
na.AssembleEntry("foo").AssignBool(true)
na.AssembleEntry("bar").AssignBool(false)
Expand All @@ -28,19 +37,20 @@ var (
na.AssembleEntry("nonlink").AssignString("zoo")
})
}))
// baguqeeyehfkkfwa
middleListNode, middleListNodeLnk = encode(fluent.MustBuildList(basicnode.Prototype.List, 4, func(na fluent.ListAssembler) {
na.AssembleValue().AssignLink(leafAlphaLnk)
na.AssembleValue().AssignLink(leafAlphaLnk)
na.AssembleValue().AssignLink(leafBetaLnk)
na.AssembleValue().AssignLink(leafAlphaLnk)
}))
// baguqeeyeie4ajfy
rootNode, rootNodeLnk = encode(fluent.MustBuildMap(basicnode.Prototype.Map, 4, func(na fluent.MapAssembler) {
na.AssembleEntry("plain").AssignString("olde string")
na.AssembleEntry("linkedString").AssignLink(leafAlphaLnk)
na.AssembleEntry("linkedMap").AssignLink(middleMapNodeLnk)
na.AssembleEntry("linkedList").AssignLink(middleListNodeLnk)
}))
)
})))
*/

// covers traverse using a variety of selectors.
Expand Down Expand Up @@ -322,3 +332,153 @@ func TestWalkBudgets(t *testing.T) {
qt.Check(t, err.Error(), qt.Equals, `traversal budget exceeded: budget for links reached zero while on path "3" (link: "baguqeeyexkjwnfy")`)
})
}

func TestWalkBlockLoadOrder(t *testing.T) {
// a more nested root that we can use to test SkipMe as well
// note that in using `rootNodeLnk` here rather than `rootNode` we're using the
// dag-json round-trip version which will have different field ordering
newRootNode, newRootLink := encode(fluent.MustBuildList(basicnode.Prototype.List, 6, func(na fluent.ListAssembler) {
na.AssembleValue().AssignLink(rootNodeLnk)
na.AssembleValue().AssignLink(middleListNodeLnk)
na.AssembleValue().AssignLink(rootNodeLnk)
na.AssembleValue().AssignLink(middleListNodeLnk)
na.AssembleValue().AssignLink(rootNodeLnk)
na.AssembleValue().AssignLink(middleListNodeLnk)
}))

linkNames := make(map[datamodel.Link]string)
linkNames[newRootLink] = "newRootLink"
linkNames[rootNodeLnk] = "rootNodeLnk"
linkNames[leafAlphaLnk] = "leafAlphaLnk"
linkNames[middleMapNodeLnk] = "middleMapNodeLnk"
linkNames[leafAlphaLnk] = "leafAlphaLnk"
linkNames[middleListNodeLnk] = "middleListNodeLnk"
linkNames[leafAlphaLnk] = "leafAlphaLnk"
linkNames[leafBetaLnk] = "leafBetaLnk"
for v, n := range linkNames {
fmt.Printf("n:%v:%v\n", n, v)
}
// the links that we expect from the root node, starting _at_ the root node itself
rootNodeExpectedLinks := []datamodel.Link{
rootNodeLnk,
middleListNodeLnk,
leafAlphaLnk,
leafAlphaLnk,
leafBetaLnk,
leafAlphaLnk,
middleMapNodeLnk,
leafAlphaLnk,
leafAlphaLnk,
}
// same thing but for middleListNode
middleListNodeLinks := []datamodel.Link{
middleListNodeLnk,
leafAlphaLnk,
leafAlphaLnk,
leafBetaLnk,
leafAlphaLnk,
}
// our newRootNode is a list that contains 3 consecutive links to rootNode
expectedAllBlocks := make([]datamodel.Link, 3*(len(rootNodeExpectedLinks)+len(middleListNodeLinks)))
for i := 0; i < 3; i++ {
copy(expectedAllBlocks[i*len(rootNodeExpectedLinks)+i*len(middleListNodeLinks):], rootNodeExpectedLinks[:])
copy(expectedAllBlocks[(i+1)*len(rootNodeExpectedLinks)+i*len(middleListNodeLinks):], middleListNodeLinks[:])
}

verifySelectorLoads := func(t *testing.T,
expected []datamodel.Link,
s datamodel.Node,
linkVisitOnce bool,
readFn func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error)) {

var count int
lsys := cidlink.DefaultLinkSystem()
lsys.StorageReadOpener = func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) {
Wish(t, l.String(), ShouldEqual, expected[count].String())
log.Printf("%v (%v) %s<> %v (%v)\n", l, linkNames[l], strings.Repeat(" ", 17-len(linkNames[l])), expected[count], linkNames[expected[count]])
count++
return readFn(lc, l)
}
sel, err := selector.CompileSelector(s)
Wish(t, err, ShouldEqual, nil)
err = traversal.Progress{
Cfg: &traversal.Config{
LinkSystem: lsys,
LinkTargetNodePrototypeChooser: basicnode.Chooser,
LinkVisitOnlyOnce: linkVisitOnce,
},
}.WalkMatching(newRootNode, sel, func(prog traversal.Progress, n datamodel.Node) error {
return nil
})
Wish(t, err, ShouldEqual, nil)
Wish(t, count, ShouldEqual, len(expected))
}

t.Run("CommonSelector_MatchAllRecursively", func(t *testing.T) {
s := selectorparse.CommonSelector_MatchAllRecursively
verifySelectorLoads(t, expectedAllBlocks, s, false, (&store).OpenRead)
})

t.Run("CommonSelector_ExploreAllRecursively", func(t *testing.T) {
s := selectorparse.CommonSelector_ExploreAllRecursively
verifySelectorLoads(t, expectedAllBlocks, s, false, (&store).OpenRead)
})

t.Run("constructed explore-all selector", func(t *testing.T) {
// used commonly in Filecoin and other places to "visit all blocks in stable order"
ssb := builder.NewSelectorSpecBuilder(basicnode.Prototype.Any)
s := ssb.ExploreRecursive(selector.RecursionLimitNone(),
ssb.ExploreAll(ssb.ExploreRecursiveEdge())).
Node()
verifySelectorLoads(t, expectedAllBlocks, s, false, (&store).OpenRead)
})

t.Run("explore-all with duplicate load skips via SkipMe", func(t *testing.T) {
// when we use SkipMe to skip loading of already visited blocks we expect to
// see the links show up in Loads but the lack of the links inside rootNode
// and middleListNode in this list beyond the first set of loads show that
// the block is not traversed when the SkipMe is received
expectedSkipMeBlocks := []datamodel.Link{
rootNodeLnk,
middleListNodeLnk,
leafAlphaLnk,
leafAlphaLnk,
leafBetaLnk,
leafAlphaLnk,
middleMapNodeLnk,
leafAlphaLnk,
leafAlphaLnk,
middleListNodeLnk,
rootNodeLnk,
middleListNodeLnk,
rootNodeLnk,
middleListNodeLnk,
}

s := selectorparse.CommonSelector_ExploreAllRecursively
visited := make(map[datamodel.Link]bool)
verifySelectorLoads(t, expectedSkipMeBlocks, s, false, func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) {
log.Printf("load %v [%v]\n", l, visited[l])
if visited[l] {
return nil, traversal.SkipMe{}
}
visited[l] = true
return (&store).OpenRead(lc, l)
})
})

t.Run("explore-all with duplicate load skips via LinkVisitOnlyOnce:true", func(t *testing.T) {
// when using LinkRevisit:false to skip duplicate block loads, our loader
// doesn't even get to see the load attempts (unlike SkipMe, where the
// loader signals the skips)
expectedLinkRevisitBlocks := []datamodel.Link{
rootNodeLnk,
middleListNodeLnk,
leafAlphaLnk,
leafBetaLnk,
middleMapNodeLnk,
}
s := selectorparse.CommonSelector_ExploreAllRecursively
verifySelectorLoads(t, expectedLinkRevisitBlocks, s, true, (&store).OpenRead)
})
}
Loading