Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ordering option to getPath #19

Merged
merged 4 commits into from
May 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { CID } from 'multiformats/cid'
import * as dagPB from '@ipld/dag-pb'
import * as Block from 'multiformats/block'
import { exporter, walkPath } from 'ipfs-unixfs-exporter'
import { transform } from 'streaming-iterables'
import { parallelMap, transform } from 'streaming-iterables'
import { Decoders, Hashers } from './defaults.js'
import { identity } from 'multiformats/hashes/identity'

Expand Down Expand Up @@ -38,9 +38,10 @@ export class Dagula {
*/
async * get (cid, options = {}) {
cid = typeof cid === 'string' ? CID.parse(cid) : cid
const order = options.order ?? 'rnd'
log('getting DAG %s', cid)
let cids = Array.isArray(cid) ? cid : [cid]
const search = options.search || breadthFirstSearch()
const search = options.search || blockLinks()
Comment on lines -43 to +44
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah right, the half-formed idea here was that the search fn could be used to let a caller pass in a stateful depth first search equivalent to the default "just return all the links in the current block", but i didn't try and implement it to try out that idea.

...all i really needed at the time was a way to pass in a rule for filtering out some links so i could implement car-scope: file dag-scope: entity for a hamt and got carried away.

search could go away in favour of linkFilter: ([name, cid]) => boolean or similar, and we could just have this get fn always do depth first. There was no strong reason for the previous default, and we can return depth-first for rnd as well.


/** @type {AbortController[]} */
let aborters = []
Expand All @@ -49,7 +50,8 @@ export class Dagula {

while (cids.length > 0) {
log('fetching %d CIDs', cids.length)
const fetchBlocks = transform(cids.length, async cid => {
const parallelFn = order === 'dfs' ? parallelMap : transform
const fetchBlocks = parallelFn(cids.length, async cid => {
if (signal) {
const aborter = new AbortController()
aborters.push(aborter)
Expand Down Expand Up @@ -77,7 +79,12 @@ export class Dagula {
// createUnsafe here.
const block = await Block.create({ bytes, cid, codec: decoder, hasher })
yield block
nextCids = nextCids.concat(search(block))
const blockCids = search(block)
if (order === 'dfs') {
yield * this.get(blockCids, options)
} else {
nextCids = nextCids.concat(blockCids)
}
}
log('%d CIDs in links', nextCids.length)
cids = nextCids
Expand All @@ -94,6 +101,7 @@ export class Dagula {
* @param {string} cidPath
* @param {object} [options]
* @param {AbortSignal} [options.signal]
* @param {'dfs'|'unk'} [options.order] Specify desired block ordering. `dfs` - Depth First Search, `unk` - unknown ordering.
* @param {'all'|'file'|'block'} [options.carScope] control how many layers of the dag are returned
* 'all': return the entire dag starting at path. (default)
* 'block': return the block identified by the path.
Expand Down Expand Up @@ -142,7 +150,7 @@ export class Dagula {
const links = getLinks(base, this.#decoders)
// fetch the entire dag rooted at the end of the provided path
if (links.length) {
yield * this.get(links, { signal: options.signal })
yield * this.get(links, { signal: options.signal, order: options.order })
}
}
// non-files, like directories, and IPLD Maps only return blocks necessary for their enumeration
Expand All @@ -152,7 +160,7 @@ export class Dagula {
if (base.unixfs.type === 'hamt-sharded-directory') {
const hamtLinks = base.node.Links?.filter(l => l.Name.length === 2).map(l => l.Hash) || []
if (hamtLinks.length) {
yield * this.get(hamtLinks, { search: hamtSearch, signal: options.signal })
yield * this.get(hamtLinks, { search: hamtSearch, signal: options.signal, order: options.order })
}
}
}
Expand Down Expand Up @@ -221,7 +229,7 @@ export class Dagula {
*
* @param {([name, cid]: [string, Link]) => boolean} linkFilter
*/
export function breadthFirstSearch (linkFilter = () => true) {
export function blockLinks (linkFilter = () => true) {
/**
* @param {import('multiformats').BlockView} block
*/
Expand All @@ -245,7 +253,7 @@ export function breadthFirstSearch (linkFilter = () => true) {
}
}

export const hamtSearch = breadthFirstSearch(([name]) => name.length === 2)
export const hamtSearch = blockLinks(([name]) => name.length === 2)

/**
* Get links as array of CIDs for a UnixFS entry.
Expand Down
165 changes: 165 additions & 0 deletions test/getPath.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,171 @@ test('should getPath on file with carScope=file', async t => {
t.deepEqual(blocks.at(3).bytes, filePart2.bytes)
})

test('should getPath on large file with carScope=file, default ordering', async t => {
// return all blocks in path and all blocks for resolved target of path
const filePart1 = await Block.decode({ codec: raw, bytes: fromString(`MORE TEST DATA ${Date.now()}`), hasher: sha256 })
const filePart2 = await Block.decode({ codec: raw, bytes: fromString(`EVEN MORE TEST DATA ${Date.now()}`), hasher: sha256 })
const filePart3 = await Block.decode({ codec: raw, bytes: fromString(`SO MUCH TEST DATA ${Date.now()}`), hasher: sha256 })
const filePart4 = await Block.decode({ codec: raw, bytes: fromString(`TEST DATA DOING THE MOST ${Date.now()}`), hasher: sha256 })
const fileSubNode1 = await Block.encode({
codec: dagPB,
hasher: sha256,
value: {
Data: new UnixFSv1({ type: 'file' }).marshal(),
Links: [
{ Name: '0', Hash: filePart1.cid },
{ Name: '1', Hash: filePart2.cid }
]
}
})
const fileSubNode2 = await Block.encode({
codec: dagPB,
hasher: sha256,
value: {
Data: new UnixFSv1({ type: 'file' }).marshal(),
Links: [
{ Name: '0', Hash: filePart3.cid },
{ Name: '1', Hash: filePart4.cid }
]
}
})

const fileNode = await Block.encode({
codec: dagPB,
hasher: sha256,
value: {
Data: new UnixFSv1({ type: 'file' }).marshal(),
Links: [
{ Name: '0', Hash: fileSubNode1.cid },
{ Name: '1', Hash: fileSubNode2.cid }
]
}
})

const dirNode = await Block.encode({
codec: dagPB,
hasher: sha256,
value: {
Data: new UnixFSv1({ type: 'directory' }).marshal(),
Links: [
{ Name: 'foo', Hash: fileNode.cid },
{ Name: 'other', Hash: CID.parse('QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn') }
]
}
})

const peer = await startBitswapPeer([filePart1, filePart2, filePart3, filePart4, fileSubNode1, fileSubNode2, fileNode, dirNode])

const libp2p = await getLibp2p()
const dagula = await fromNetwork(libp2p, { peer: peer.libp2p.getMultiaddrs()[0] })

const blocks = []
const carScope = 'file'
for await (const entry of dagula.getPath(`${dirNode.cid}/foo`, { carScope })) {
blocks.push(entry)
}
// did not try and return block for `other`
t.is(blocks.length, 8)
t.deepEqual(blocks.at(0).cid, dirNode.cid)
t.deepEqual(blocks.at(0).bytes, dirNode.bytes)
t.deepEqual(blocks.at(1).cid, fileNode.cid)
t.deepEqual(blocks.at(1).bytes, fileNode.bytes)
t.deepEqual(blocks.at(2).cid, fileSubNode1.cid)
t.deepEqual(blocks.at(2).bytes, fileSubNode1.bytes)
t.deepEqual(blocks.at(3).cid, fileSubNode2.cid)
t.deepEqual(blocks.at(3).bytes, fileSubNode2.bytes)
t.deepEqual(blocks.at(4).cid, filePart1.cid)
t.deepEqual(blocks.at(4).bytes, filePart1.bytes)
t.deepEqual(blocks.at(5).cid, filePart2.cid)
t.deepEqual(blocks.at(5).bytes, filePart2.bytes)
t.deepEqual(blocks.at(6).cid, filePart3.cid)
t.deepEqual(blocks.at(6).bytes, filePart3.bytes)
t.deepEqual(blocks.at(7).cid, filePart4.cid)
t.deepEqual(blocks.at(7).bytes, filePart4.bytes)
})

test('should getPath on large file with carScope=file, dfs ordering', async t => {
// return all blocks in path and all blocks for resolved target of path
const filePart1 = await Block.decode({ codec: raw, bytes: fromString(`MORE TEST DATA ${Date.now()}`), hasher: sha256 })
const filePart2 = await Block.decode({ codec: raw, bytes: fromString(`EVEN MORE TEST DATA ${Date.now()}`), hasher: sha256 })
const filePart3 = await Block.decode({ codec: raw, bytes: fromString(`SO MUCH TEST DATA ${Date.now()}`), hasher: sha256 })
const filePart4 = await Block.decode({ codec: raw, bytes: fromString(`TEST DATA DOING THE MOST ${Date.now()}`), hasher: sha256 })
const fileSubNode1 = await Block.encode({
codec: dagPB,
hasher: sha256,
value: {
Data: new UnixFSv1({ type: 'file' }).marshal(),
Links: [
{ Name: '0', Hash: filePart1.cid },
{ Name: '1', Hash: filePart2.cid }
]
}
})
const fileSubNode2 = await Block.encode({
codec: dagPB,
hasher: sha256,
value: {
Data: new UnixFSv1({ type: 'file' }).marshal(),
Links: [
{ Name: '0', Hash: filePart3.cid },
{ Name: '1', Hash: filePart4.cid }
]
}
})

const fileNode = await Block.encode({
codec: dagPB,
hasher: sha256,
value: {
Data: new UnixFSv1({ type: 'file' }).marshal(),
Links: [
{ Name: '0', Hash: fileSubNode1.cid },
{ Name: '1', Hash: fileSubNode2.cid }
]
}
})

const dirNode = await Block.encode({
codec: dagPB,
hasher: sha256,
value: {
Data: new UnixFSv1({ type: 'directory' }).marshal(),
Links: [
{ Name: 'foo', Hash: fileNode.cid },
{ Name: 'other', Hash: CID.parse('QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn') }
]
}
})

const peer = await startBitswapPeer([filePart1, filePart2, filePart3, filePart4, fileSubNode1, fileSubNode2, fileNode, dirNode])

const libp2p = await getLibp2p()
const dagula = await fromNetwork(libp2p, { peer: peer.libp2p.getMultiaddrs()[0] })

const blocks = []
const carScope = 'file'
for await (const entry of dagula.getPath(`${dirNode.cid}/foo`, { carScope, order: 'dfs' })) {
blocks.push(entry)
}
// did not try and return block for `other`
t.is(blocks.length, 8)
t.deepEqual(blocks.at(0).cid, dirNode.cid)
t.deepEqual(blocks.at(0).bytes, dirNode.bytes)
t.deepEqual(blocks.at(1).cid, fileNode.cid)
t.deepEqual(blocks.at(1).bytes, fileNode.bytes)
t.deepEqual(blocks.at(2).cid, fileSubNode1.cid)
t.deepEqual(blocks.at(2).bytes, fileSubNode1.bytes)
t.deepEqual(blocks.at(3).cid, filePart1.cid)
t.deepEqual(blocks.at(3).bytes, filePart1.bytes)
t.deepEqual(blocks.at(4).cid, filePart2.cid)
t.deepEqual(blocks.at(4).bytes, filePart2.bytes)
t.deepEqual(blocks.at(5).cid, fileSubNode2.cid)
t.deepEqual(blocks.at(5).bytes, fileSubNode2.bytes)
t.deepEqual(blocks.at(6).cid, filePart3.cid)
t.deepEqual(blocks.at(6).bytes, filePart3.bytes)
t.deepEqual(blocks.at(7).cid, filePart4.cid)
t.deepEqual(blocks.at(7).bytes, filePart4.bytes)
})
test('should getPath on file with carScope=block', async t => {
// return all blocks in path and all blocks for resolved target of path
const filePart1 = await Block.decode({ codec: raw, bytes: fromString(`MORE TEST DATA ${Date.now()}`), hasher: sha256 })
Expand Down