From c27f637b0d5aef52a06242da3279a7778bdbffac Mon Sep 17 00:00:00 2001 From: achingbrain Date: Tue, 27 Feb 2024 09:02:56 +0000 Subject: [PATCH 1/6] fix: use unixfs exporter to traverse DAGs Reuse the existing exporter `walkPath` method to traverse a DAG as it is clever enough to not load unecessary blocks which is important when traversing HAMTs. --- .../src/verified-fetch-unixfs-dir.spec.ts | 3 +- packages/unixfs/src/commands/utils/resolve.ts | 61 ++++++------------- packages/unixfs/test/cat.spec.ts | 38 ++++++++++++ 3 files changed, 56 insertions(+), 46 deletions(-) diff --git a/packages/interop/src/verified-fetch-unixfs-dir.spec.ts b/packages/interop/src/verified-fetch-unixfs-dir.spec.ts index b88b3b53..2f2db818 100644 --- a/packages/interop/src/verified-fetch-unixfs-dir.spec.ts +++ b/packages/interop/src/verified-fetch-unixfs-dir.spec.ts @@ -75,8 +75,7 @@ describe('@helia/verified-fetch - unixfs directory', () => { }) }) - // TODO: find a smaller car file so the test doesn't timeout locally or flake on CI - describe.skip('HAMT-sharded directory', () => { + describe('HAMT-sharded directory', () => { before(async () => { // from https://github.com/ipfs/gateway-conformance/blob/193833b91f2e9b17daf45c84afaeeae61d9d7c7e/fixtures/trustless_gateway_car/single-layer-hamt-with-multi-block-files.car await loadFixtureDataCar(controller, 'bafybeidbclfqleg2uojchspzd4bob56dqetqjsj27gy2cq3klkkgxtpn4i-single-layer-hamt-with-multi-block-files.car') diff --git a/packages/unixfs/src/commands/utils/resolve.ts b/packages/unixfs/src/commands/utils/resolve.ts index 43c5defe..373c7cc4 100644 --- a/packages/unixfs/src/commands/utils/resolve.ts +++ b/packages/unixfs/src/commands/utils/resolve.ts @@ -1,6 +1,6 @@ import { logger } from '@libp2p/logger' -import { exporter } from 'ipfs-unixfs-exporter' -import { DoesNotExistError, InvalidParametersError } from '../../errors.js' +import { walkPath } from 'ipfs-unixfs-exporter' +import { DoesNotExistError } from '../../errors.js' import { addLink } from './add-link.js' import { cidToDirectory } from './cid-to-directory.js' import { cidToPBLink } from './cid-to-pblink.js' @@ -37,57 +37,30 @@ export async function resolve (cid: CID, path: string | undefined, blockstore: B return { cid } } - log('resolve "%s" under %c', path, cid) - - const parts = path.split('/').filter(Boolean) - const segments: Segment[] = [{ - name: '', - cid, - size: 0n - }] - - for (let i = 0; i < parts.length; i++) { - const part = parts[i] - const result = await exporter(cid, blockstore, options) - - log('resolving "%s"', part, result) - - if (result.type === 'file') { - if (i < parts.length - 1) { - throw new InvalidParametersError('Path was invalid') - } - - cid = result.cid - } else if (result.type === 'directory') { - let dirCid: CID | undefined - - for await (const entry of result.content()) { - if (entry.name === part) { - dirCid = entry.cid - break - } - } - - if (dirCid == null) { - throw new DoesNotExistError('Could not find path in directory') - } - - cid = dirCid + const p = `/ipfs/${cid}${path == null ? '' : `/${path}`}` + const segments: Segment[] = [] + try { + for await (const segment of walkPath(p, blockstore, options)) { segments.push({ - name: part, - cid, - size: result.size + name: segment.name === segment.cid.toString() ? '' : segment.name, + cid: segment.cid, + size: segment.size }) - } else { - throw new InvalidParametersError('Could not resolve path') } + } catch (err: any) { + // TODO: just allow `walkPath` to fail, this will change the error code + if (err.code === 'ERR_NOT_FOUND') { + throw new DoesNotExistError('Could not find path in directory') + } + + throw err } log('resolved %s to %c', path, cid) return { - cid, + cid: segments[segments.length - 1].cid, path, segments } diff --git a/packages/unixfs/test/cat.spec.ts b/packages/unixfs/test/cat.spec.ts index 1f8740d4..140f78a6 100644 --- a/packages/unixfs/test/cat.spec.ts +++ b/packages/unixfs/test/cat.spec.ts @@ -9,6 +9,7 @@ import { createShardedDirectory } from './fixtures/create-sharded-directory.js' import { smallFile } from './fixtures/files.js' import type { Blockstore } from 'interface-blockstore' import type { CID } from 'multiformats/cid' +import all from 'it-all' describe('cat', () => { let blockstore: Blockstore @@ -92,4 +93,41 @@ describe('cat', () => { expect(bytes).to.deep.equal(smallFile) }) + + it('should only load blocks necessary to traverse a HAMT', async () => { + const [, scriptFile, styleFile, imageFile, dir] = await all(fs.addAll([{ + path: 'index.html', + content: Uint8Array.from([0, 1, 2]) + }, { + path: 'script.js', + content: Uint8Array.from([3, 4, 5]) + }, { + path: 'style.css', + content: Uint8Array.from([6, 7, 8]) + }, { + path: 'image.png', + content: Uint8Array.from([9, 0, 1]) + }], { + shardSplitThresholdBytes: 1, + wrapWithDirectory: true + })) + + const dirStat = await fs.stat(dir.cid) + expect(dirStat.unixfs?.type).to.equal('hamt-sharded-directory') + + // remove all blocks that aren't the index file + await drain(blockstore.deleteMany([ + scriptFile.cid, + styleFile.cid, + imageFile.cid + ])) + + // should be able to cat the index file without loading the other files + // in the shard + const bytes = await toBuffer(fs.cat(dir.cid, { + path: 'index.html' + })) + + expect(bytes).to.equalBytes(Uint8Array.from([0, 1, 2])) + }) }) From d697d8a38c0971324802d6e1296e9aa07a0dc3cf Mon Sep 17 00:00:00 2001 From: achingbrain Date: Tue, 27 Feb 2024 09:09:03 +0000 Subject: [PATCH 2/6] chore: expand comment --- packages/unixfs/src/commands/utils/resolve.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/packages/unixfs/src/commands/utils/resolve.ts b/packages/unixfs/src/commands/utils/resolve.ts index 373c7cc4..9221208a 100644 --- a/packages/unixfs/src/commands/utils/resolve.ts +++ b/packages/unixfs/src/commands/utils/resolve.ts @@ -49,7 +49,9 @@ export async function resolve (cid: CID, path: string | undefined, blockstore: B }) } } catch (err: any) { - // TODO: just allow `walkPath` to fail, this will change the error code + // TODO: remove this try/catch and error code mapping - just allow + // `walkPath` to fail, this will change the error code which is a breaking + // change if (err.code === 'ERR_NOT_FOUND') { throw new DoesNotExistError('Could not find path in directory') } @@ -57,6 +59,10 @@ export async function resolve (cid: CID, path: string | undefined, blockstore: B throw err } + if (segments.length === 0) { + throw new DoesNotExistError('Could not find path in directory') + } + log('resolved %s to %c', path, cid) return { From 43d0bcae29e46cfea32dab7b331c51bbf3d466d2 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Tue, 27 Feb 2024 09:13:18 +0000 Subject: [PATCH 3/6] chore: linting --- packages/unixfs/test/cat.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/unixfs/test/cat.spec.ts b/packages/unixfs/test/cat.spec.ts index 140f78a6..e6dbe423 100644 --- a/packages/unixfs/test/cat.spec.ts +++ b/packages/unixfs/test/cat.spec.ts @@ -2,6 +2,7 @@ import { expect } from 'aegir/chai' import { MemoryBlockstore } from 'blockstore-core' +import all from 'it-all' import drain from 'it-drain' import toBuffer from 'it-to-buffer' import { unixfs, type UnixFS } from '../src/index.js' @@ -9,7 +10,6 @@ import { createShardedDirectory } from './fixtures/create-sharded-directory.js' import { smallFile } from './fixtures/files.js' import type { Blockstore } from 'interface-blockstore' import type { CID } from 'multiformats/cid' -import all from 'it-all' describe('cat', () => { let blockstore: Blockstore From fa47e087150635899b71270f83bc35a456e69018 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Tue, 27 Feb 2024 09:14:49 +0000 Subject: [PATCH 4/6] chore: update comment --- packages/unixfs/test/cat.spec.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/unixfs/test/cat.spec.ts b/packages/unixfs/test/cat.spec.ts index e6dbe423..bded215b 100644 --- a/packages/unixfs/test/cat.spec.ts +++ b/packages/unixfs/test/cat.spec.ts @@ -123,7 +123,8 @@ describe('cat', () => { ])) // should be able to cat the index file without loading the other files - // in the shard + // in the shard - the blockstore is offline so will throw if requested + // blocks are not present const bytes = await toBuffer(fs.cat(dir.cid, { path: 'index.html' })) From f992160fa4c4624a5676d20cae5730cae957083d Mon Sep 17 00:00:00 2001 From: achingbrain Date: Tue, 27 Feb 2024 13:49:55 +0000 Subject: [PATCH 5/6] chore: simplify --- packages/unixfs/src/commands/utils/resolve.ts | 22 ++----------------- packages/unixfs/test/rm.spec.ts | 8 +++---- 2 files changed, 6 insertions(+), 24 deletions(-) diff --git a/packages/unixfs/src/commands/utils/resolve.ts b/packages/unixfs/src/commands/utils/resolve.ts index 9221208a..b776ed0f 100644 --- a/packages/unixfs/src/commands/utils/resolve.ts +++ b/packages/unixfs/src/commands/utils/resolve.ts @@ -1,5 +1,6 @@ import { logger } from '@libp2p/logger' import { walkPath } from 'ipfs-unixfs-exporter' +import all from 'it-all' import { DoesNotExistError } from '../../errors.js' import { addLink } from './add-link.js' import { cidToDirectory } from './cid-to-directory.js' @@ -38,26 +39,7 @@ export async function resolve (cid: CID, path: string | undefined, blockstore: B } const p = `/ipfs/${cid}${path == null ? '' : `/${path}`}` - const segments: Segment[] = [] - - try { - for await (const segment of walkPath(p, blockstore, options)) { - segments.push({ - name: segment.name === segment.cid.toString() ? '' : segment.name, - cid: segment.cid, - size: segment.size - }) - } - } catch (err: any) { - // TODO: remove this try/catch and error code mapping - just allow - // `walkPath` to fail, this will change the error code which is a breaking - // change - if (err.code === 'ERR_NOT_FOUND') { - throw new DoesNotExistError('Could not find path in directory') - } - - throw err - } + const segments = await all(walkPath(p, blockstore, options)) if (segments.length === 0) { throw new DoesNotExistError('Could not find path in directory') diff --git a/packages/unixfs/test/rm.spec.ts b/packages/unixfs/test/rm.spec.ts index 8b6227b5..380657d9 100644 --- a/packages/unixfs/test/rm.spec.ts +++ b/packages/unixfs/test/rm.spec.ts @@ -38,7 +38,7 @@ describe('rm', () => { await expect(fs.stat(updatedDirCid, { path })).to.eventually.be.rejected - .with.property('code', 'ERR_DOES_NOT_EXIST') + .with.property('code', 'ERR_NOT_FOUND') }) it('removes a directory', async () => { @@ -49,7 +49,7 @@ describe('rm', () => { await expect(fs.stat(updatedDirCid, { path })).to.eventually.be.rejected - .with.property('code', 'ERR_DOES_NOT_EXIST') + .with.property('code', 'ERR_NOT_FOUND') }) it('removes a sharded directory inside a normal directory', async () => { @@ -67,7 +67,7 @@ describe('rm', () => { await expect(fs.stat(updatedContainingDirCid, { path: dirName })).to.eventually.be.rejected - .with.property('code', 'ERR_DOES_NOT_EXIST') + .with.property('code', 'ERR_NOT_FOUND') }) it('removes a sharded directory inside a sharded directory', async () => { @@ -86,7 +86,7 @@ describe('rm', () => { await expect(fs.stat(updatedContainingDirCid, { path: dirName })).to.eventually.be.rejected - .with.property('code', 'ERR_DOES_NOT_EXIST') + .with.property('code', 'ERR_NOT_FOUND') expect(updatedContainingDirCid.toString()).to.equal(shardedDirCid.toString(), 'adding and removing a file from a sharded directory did not result in the original sharded CID') }) From e9edf817f4f7c6ab604076b0bd781fdbc93dcec7 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Tue, 27 Feb 2024 14:13:16 +0000 Subject: [PATCH 6/6] chore: fix deps --- packages/unixfs/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/unixfs/package.json b/packages/unixfs/package.json index a2099de7..c4ba3e4d 100644 --- a/packages/unixfs/package.json +++ b/packages/unixfs/package.json @@ -169,6 +169,7 @@ "ipfs-unixfs": "^11.1.3", "ipfs-unixfs-exporter": "^13.5.0", "ipfs-unixfs-importer": "^15.2.4", + "it-all": "^3.0.4", "it-glob": "^2.0.6", "it-last": "^3.0.4", "it-pipe": "^3.0.1", @@ -183,7 +184,6 @@ "blockstore-core": "^4.4.0", "delay": "^6.0.0", "iso-url": "^1.2.1", - "it-all": "^3.0.4", "it-drain": "^3.0.5", "it-first": "^3.0.4", "it-to-buffer": "^4.0.5",