Skip to content

Commit

Permalink
fix: dfs ordering (#38)
Browse files Browse the repository at this point in the history
  • Loading branch information
Alan Shaw authored Sep 19, 2023
1 parent b100175 commit da3eb46
Show file tree
Hide file tree
Showing 4 changed files with 234 additions and 59 deletions.
67 changes: 9 additions & 58 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ import debug from 'debug'
import { CID } from 'multiformats/cid'
import * as dagPB from '@ipld/dag-pb'
import * as Block from 'multiformats/block'
import * as raw from 'multiformats/codecs/raw'
import { UnixFS } from 'ipfs-unixfs'
import { exporter, walkPath } from 'ipfs-unixfs-exporter'
import { parallelMap, transform } from 'streaming-iterables'
import { Decoders, Hashers } from './defaults.js'
import { identity } from 'multiformats/hashes/identity'
import { depthFirst, breadthFirst } from './traversal.js'

/**
* @typedef {{ unixfs?: UnixFS }} LinkFilterContext
Expand Down Expand Up @@ -107,10 +107,16 @@ export class Dagula {
// createUnsafe here.
const block = await Block.create({ bytes, cid, codec: decoder, hasher })
yield block
nextSelectors.push(...search(getLinks(block, selector)))
nextSelectors.push(...getLinks(block, selector))
}
log('%d CIDs in links', nextSelectors.length)
selectors = nextSelectors
// reduce the next selectors in the links to the ones that should be
// considered for the given DAG traversal method. e.g. if using DFS and
// next selectors has 1 raw block, and 2 non-raw blocks, then the DFS
// search will reduce the next selectors down to just the first 2 items,
// since the second item (the non-raw block) may have links that need to
// be traversed before the others.
selectors = search(nextSelectors)
}
}

Expand Down Expand Up @@ -438,58 +444,3 @@ function getUnixfsHamtPadLength (fanout) {
if (!fanout) throw new Error('missing fanout')
return (Number(fanout) - 1).toString(16).length
}

/**
* Create a depth-first search function.
* Call it with the latest links, it returns the link(s) to follow next.
* Maintains a queue of links it has seen but not offered up yet.
*
* In depth first, we have to resolve links one at a time; we have to
* find out if there are child links to follow before trying siblings.
*
* The exception to this rule is when the child links are IPLD "raw" and
* we know upfront they have no links to follow. In this case we can return
* multiple.
*
* e.g.
*
* o
* β”œβ”€β”€ x
* β”‚ β”œβ”€β”€ x1 (raw)
* β”‚ └── x2 (raw)
* β”œβ”€β”€ y
* └── z
* └── z1
*
* [x, y, z] => [x] (queue: [y, z])
* [x1, x2] => [x1, x2] (queue: [y, z])
* [] => [y] (queue: [z])
* [] => [z] (queue: [])
* [z1] => [z1] (queue: [])
*/
export function depthFirst () {
/** @type {GraphSelector[]} */
let queue = []

/** @param {GraphSelector[]} selectors */
return (selectors = []) => {
queue = selectors.concat(queue)
const next = []
for (let i = 0; i < queue.length; i++) {
if (i > 0 && queue[i].cid.code !== raw.code) {
break // leave in queue, we will get it next time
}
next.push(queue[i])
}
queue = queue.slice(next.length)
return next
}
}

/**
* Create a trivial breadth first search that returns the links you give it
*/
export function breadthFirst () {
/** @param {GraphSelector[]} selectors */
return selectors => selectors
}
2 changes: 1 addition & 1 deletion prefix.test.js β†’ test/prefix.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import test from 'ava'
import * as dagCbor from '@ipld/dag-cbor'
import { sha256 } from 'multiformats/hashes/sha2'
import * as Block from 'multiformats/block'
import * as Prefix from './prefix.js'
import * as Prefix from '../prefix.js'

test('should round trip a prefix', async t => {
const { cid } = await Block.encode({ value: { some: 'data' }, codec: dagCbor, hasher: sha256 })
Expand Down
151 changes: 151 additions & 0 deletions test/traversal.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import test from 'ava'
import * as Link from 'multiformats/link'
import { breadthFirst, depthFirst } from '../traversal.js'

/** @typedef {{ cid: import('multiformats').UnknownLink, links: BlockFixture[] }} BlockFixture */

/*
DAG looks like:
- bafybeiadwbtmvpjp5z5ogb4a6nubsca2edpgwjpu2nc4p6voxpizqr7bxm
- bafybeidy4bxkuarnyf4o5ue467axle6tukqbpdulorrbneycgsihqgne6u
- bafkreihbvksijmidg6imolddzkyse2azgewirljymybzsqz6y53bozhsuy
- bafkreifoyyvt5q4vkdhmrygewnbq7rqbkrbtkbxkhqzgt3m6ockew2xat4
- bafybeiekqr2ksbjoyssjfpfhw5zuahffvswgg3dqzwamcivrd7kvnw7kgi
- bafkreiekffiubbdy33otvzb2txuw66xvhsyimb5emditnj74hvwnogpi3u
- bafkreihonbrwz4skbrdse7g2zewm6scxspeziyyafpmg5s7kyiedlx25va
- bafkreia7r7kgduaobnmrzebcxfhtshnscvhivz7qvydmzp4fmzmlkj25zi
- bafkreiatglcrcgjn7lbo5wyvgcyymyisdh2cbrwoifrxusud5btsf3hfrq
- bafybeic33d557xulugldf6iai5mvbotaqilxxvgnvgeuudg4fxu27n6av4
- bafkreiguu7x4cfggoegivvvwjs7sxelc6ls32xm5mtpah5ybgmegsushqi
- bafkreihg2s4tmyw2acaejgomhowudk7rixxp4eg6onla27xzh3sxove7j4
- bafkreihwslfxu2bwzgmd47rhxmnxjzauxncmfdv3s4cegxgtfn3aybuhy4
*/

const fixture = {
root: 'bafybeiadwbtmvpjp5z5ogb4a6nubsca2edpgwjpu2nc4p6voxpizqr7bxm',
order: {
depthFirst: [
'bafybeiadwbtmvpjp5z5ogb4a6nubsca2edpgwjpu2nc4p6voxpizqr7bxm',
'bafybeidy4bxkuarnyf4o5ue467axle6tukqbpdulorrbneycgsihqgne6u',
'bafkreihbvksijmidg6imolddzkyse2azgewirljymybzsqz6y53bozhsuy',
'bafkreifoyyvt5q4vkdhmrygewnbq7rqbkrbtkbxkhqzgt3m6ockew2xat4',
'bafybeiekqr2ksbjoyssjfpfhw5zuahffvswgg3dqzwamcivrd7kvnw7kgi',
'bafkreiekffiubbdy33otvzb2txuw66xvhsyimb5emditnj74hvwnogpi3u',
'bafkreihonbrwz4skbrdse7g2zewm6scxspeziyyafpmg5s7kyiedlx25va',
'bafkreia7r7kgduaobnmrzebcxfhtshnscvhivz7qvydmzp4fmzmlkj25zi',
'bafkreiatglcrcgjn7lbo5wyvgcyymyisdh2cbrwoifrxusud5btsf3hfrq',
'bafybeic33d557xulugldf6iai5mvbotaqilxxvgnvgeuudg4fxu27n6av4',
'bafkreiguu7x4cfggoegivvvwjs7sxelc6ls32xm5mtpah5ybgmegsushqi',
'bafkreihg2s4tmyw2acaejgomhowudk7rixxp4eg6onla27xzh3sxove7j4',
'bafkreihwslfxu2bwzgmd47rhxmnxjzauxncmfdv3s4cegxgtfn3aybuhy4'
],
breadthFirst: [
'bafybeiadwbtmvpjp5z5ogb4a6nubsca2edpgwjpu2nc4p6voxpizqr7bxm',
'bafybeidy4bxkuarnyf4o5ue467axle6tukqbpdulorrbneycgsihqgne6u',
'bafybeiekqr2ksbjoyssjfpfhw5zuahffvswgg3dqzwamcivrd7kvnw7kgi',
'bafkreia7r7kgduaobnmrzebcxfhtshnscvhivz7qvydmzp4fmzmlkj25zi',
'bafkreiatglcrcgjn7lbo5wyvgcyymyisdh2cbrwoifrxusud5btsf3hfrq',
'bafybeic33d557xulugldf6iai5mvbotaqilxxvgnvgeuudg4fxu27n6av4',
'bafkreihwslfxu2bwzgmd47rhxmnxjzauxncmfdv3s4cegxgtfn3aybuhy4',
'bafkreihbvksijmidg6imolddzkyse2azgewirljymybzsqz6y53bozhsuy',
'bafkreifoyyvt5q4vkdhmrygewnbq7rqbkrbtkbxkhqzgt3m6ockew2xat4',
'bafkreiekffiubbdy33otvzb2txuw66xvhsyimb5emditnj74hvwnogpi3u',
'bafkreihonbrwz4skbrdse7g2zewm6scxspeziyyafpmg5s7kyiedlx25va',
'bafkreiguu7x4cfggoegivvvwjs7sxelc6ls32xm5mtpah5ybgmegsushqi',
'bafkreihg2s4tmyw2acaejgomhowudk7rixxp4eg6onla27xzh3sxove7j4'
]
},
/** @type {Map<string, string[]>} */
blocks: new Map([
[
'bafybeiadwbtmvpjp5z5ogb4a6nubsca2edpgwjpu2nc4p6voxpizqr7bxm',
[
'bafybeidy4bxkuarnyf4o5ue467axle6tukqbpdulorrbneycgsihqgne6u',
'bafybeiekqr2ksbjoyssjfpfhw5zuahffvswgg3dqzwamcivrd7kvnw7kgi',
'bafkreia7r7kgduaobnmrzebcxfhtshnscvhivz7qvydmzp4fmzmlkj25zi',
'bafkreiatglcrcgjn7lbo5wyvgcyymyisdh2cbrwoifrxusud5btsf3hfrq',
'bafybeic33d557xulugldf6iai5mvbotaqilxxvgnvgeuudg4fxu27n6av4',
'bafkreihwslfxu2bwzgmd47rhxmnxjzauxncmfdv3s4cegxgtfn3aybuhy4'
]
],
[
'bafybeidy4bxkuarnyf4o5ue467axle6tukqbpdulorrbneycgsihqgne6u',
[
'bafkreihbvksijmidg6imolddzkyse2azgewirljymybzsqz6y53bozhsuy',
'bafkreifoyyvt5q4vkdhmrygewnbq7rqbkrbtkbxkhqzgt3m6ockew2xat4'
]
],
[
'bafkreihbvksijmidg6imolddzkyse2azgewirljymybzsqz6y53bozhsuy',
[]
],
[
'bafkreifoyyvt5q4vkdhmrygewnbq7rqbkrbtkbxkhqzgt3m6ockew2xat4',
[]
],
[
'bafybeiekqr2ksbjoyssjfpfhw5zuahffvswgg3dqzwamcivrd7kvnw7kgi',
[
'bafkreiekffiubbdy33otvzb2txuw66xvhsyimb5emditnj74hvwnogpi3u',
'bafkreihonbrwz4skbrdse7g2zewm6scxspeziyyafpmg5s7kyiedlx25va'
]
],
[
'bafkreiekffiubbdy33otvzb2txuw66xvhsyimb5emditnj74hvwnogpi3u',
[]
],
[
'bafkreihonbrwz4skbrdse7g2zewm6scxspeziyyafpmg5s7kyiedlx25va',
[]
],
[
'bafkreia7r7kgduaobnmrzebcxfhtshnscvhivz7qvydmzp4fmzmlkj25zi',
[]
],
[
'bafkreiatglcrcgjn7lbo5wyvgcyymyisdh2cbrwoifrxusud5btsf3hfrq',
[]
],
[
'bafybeic33d557xulugldf6iai5mvbotaqilxxvgnvgeuudg4fxu27n6av4',
[
'bafkreiguu7x4cfggoegivvvwjs7sxelc6ls32xm5mtpah5ybgmegsushqi',
'bafkreihg2s4tmyw2acaejgomhowudk7rixxp4eg6onla27xzh3sxove7j4'
]
],
[
'bafkreiguu7x4cfggoegivvvwjs7sxelc6ls32xm5mtpah5ybgmegsushqi',
[]
],
[
'bafkreihg2s4tmyw2acaejgomhowudk7rixxp4eg6onla27xzh3sxove7j4',
[]
],
[
'bafkreihwslfxu2bwzgmd47rhxmnxjzauxncmfdv3s4cegxgtfn3aybuhy4',
[]
]
])
}

for (const traversalFn of [depthFirst, breadthFirst]) {
test(`should traverse ${traversalFn.name}`, async t => {
const traverse = traversalFn()
const order = []
const root = Link.parse(fixture.root)

let links = [{ cid: root }]
while (links.length > 0) {
const nextLinks = []
for (const item of links) {
order.push(item.cid.toString())
const links = fixture.blocks.get(item.cid.toString())
if (links == null) throw new Error(`missing block in fixture: ${item.cid}`)
nextLinks.push(...links.map(l => ({ cid: Link.parse(l) })))
}
links = traverse(nextLinks)
}

t.deepEqual(order, fixture.order[traversalFn.name])
})
}
73 changes: 73 additions & 0 deletions traversal.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import * as raw from 'multiformats/codecs/raw'

/** @typedef {{ cid: import('multiformats').UnknownLink }} ContentAddressedObject An object with a CID property. */

/**
* Create a depth-first search function.
* Call it with the latest links, it returns the link(s) to follow next.
* Maintains a queue of links it has seen but not offered up yet.
*
* In depth first, we have to resolve links one at a time; we have to
* find out if there are child links to follow before trying siblings.
*
* The exception to this rule is when the child links are IPLD "raw" and
* we know upfront they have no links to follow. In this case we can return
* multiple.
*
* e.g.
*
* ```
* o
* β”œβ”€β”€ x
* β”‚ β”œβ”€β”€ x1 (raw)
* β”‚ └── x2 (raw)
* β”‚ └── x2 (raw)
* β”œβ”€β”€ y
* └── z
* └── z1
*
* [x, y, z] => [x] (queue: [y, z])
* [x1, x2] => [x1, x2] (queue: [y, z])
* [] => [y] (queue: [z])
* [] => [z] (queue: [])
* [z1] => [z1] (queue: [])
* ```
*/
export function depthFirst () {
/**
* @template {ContentAddressedObject} T
* @type {T[]}
*/
let queue = []

/**
* @template {ContentAddressedObject} T
* @param {T[]} links
*/
return (links = []) => {
queue = links.concat(queue)
const next = []
for (let i = 0; i < queue.length; i++) {
next.push(queue[i])
// if this item is not raw, do not return any more items after it, since
// it may have links we need to descend into and return before anything
// else that is already queued.
if (queue[i].cid.code !== raw.code) {
break
}
}
queue = queue.slice(next.length)
return next
}
}

/**
* Create a trivial breadth first search that returns the links you give it
*/
export function breadthFirst () {
/**
* @template {ContentAddressedObject} T
* @param {T[]} links
*/
return links => links
}

0 comments on commit da3eb46

Please sign in to comment.