Skip to content

Commit

Permalink
feat: add HAMT sharded directories support (#41)
Browse files Browse the repository at this point in the history
Integrates `@perma/map` to allow HAMT sharded directories to be created.

In this PR there's no automated switch to HAMT, the consumer needs to explicitly use `createShardedDirectoryWriter` to build one.
  • Loading branch information
Alan Shaw authored Mar 14, 2023
1 parent 802c7db commit fb87f9d
Show file tree
Hide file tree
Showing 9 changed files with 1,157 additions and 32 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ dist
.nyc_output
tmp
node_modules
coverage
6 changes: 4 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,17 @@
"test:web": "playwright-test test/**/*.spec.js --cov && nyc report",
"test:node": "c8 --check-coverage --branches 95 --functions 83 --lines 94 mocha test/**/*.spec.js",
"test": "npm run test:node",
"coverage": "c8 --reporter=html mocha test/test-*.js && npm_config_yes=true npx st -d coverage -p 8080",
"coverage": "c8 --reporter=html mocha test/**/*.spec.js && npm_config_yes=true npx st -d coverage -p 8080",
"typecheck": "tsc --build",
"test:convergence": "mocha test/convergence.js"
},
"dependencies": {
"multiformats": "^11.0.1",
"@ipld/dag-pb": "^4.0.0",
"@multiformats/murmur3": "^2.1.3",
"@perma/map": "^1.0.2",
"@web-std/stream": "1.0.1",
"actor": "^2.3.1",
"multiformats": "^11.0.1",
"protobufjs": "^7.1.2",
"rabin-rs": "^2.1.0"
},
Expand Down
1 change: 0 additions & 1 deletion src/directory/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ export interface View<Layout extends unknown = unknown> extends Writer<Layout> {
readonly writer: BlockWriter
readonly settings: EncoderSettings<Layout>

links(): IterableIterator<DirectoryEntryLink>
state: State<Layout>

entries(): IterableIterator<[string, EntryLink]>
Expand Down
5 changes: 5 additions & 0 deletions src/lib.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ export {
set,
remove,
} from "./directory.js"
export {
create as createShardedDirectoryWriter,
close as closeShardedDirectory,
fork as forkShardedDirectory,
} from "./sharded-directory.js"

/**
* @template [Layout=unknown]
Expand Down
319 changes: 319 additions & 0 deletions src/sharded-directory.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,319 @@

import * as PermaMap from "@perma/map"
import * as UnixFSPermaMap from "@perma/map/unixfs"
import * as PB from "@ipld/dag-pb"
import { murmur364 } from "@multiformats/murmur3"
import { Block } from 'multiformats/block'
import * as API from "./directory/api.js"
import * as File from "./file.js"
import * as UnixFS from "./codec.js"
import { set, remove } from "./directory.js"

export * from "./directory/api.js"
export { set, remove } from "./directory.js"

export const configure = File.configure
export const defaults = File.defaults

/**
* @template [Layout=unknown]
* @param {API.Options<Layout>} config
* @returns {API.View<Layout>}
*/
export const create = ({ writer, settings = defaults(), metadata = {} }) =>
new HAMTDirectoryWriter({
writer,
metadata,
settings,
entries: new HashMap(),
closed: false,
})

/**
* @template {API.State} Writer
* @param {Writer} writer
* @returns {Writer}
*/
const asWritable = writer => {
if (!writer.closed) {
return writer
} else {
throw new Error("Can not change written HAMT directory, but you can .fork() and make changes to it")
}
}

/**
* @template {unknown} Layout
* @param {{ state: API.State<Layout> }} view
* @param {API.CloseOptions} options
* @returns {Promise<UnixFS.DirectoryLink>}
*/
export const close = async (
view,
{ closeWriter = false, releaseLock = false } = {}
) => {
const { writer, settings, metadata } = asWritable(view.state)
view.state.closed = true

const { entries } = view.state
/* c8 ignore next 3 */
if (!(entries instanceof HashMap)) {
throw new Error(`not a HAMT: ${entries}`)
}

const hamt = entries.builder.build()
const blocks = iterateBlocks(hamt, hamt.root, settings)

/** @type {UnixFS.BlockView<UnixFS.DirectoryShard>?} */
let root = null
for await (const block of blocks) {
root = block
// we make sure that writer has some capacity for this write. If it
// does not we await.
if ((writer.desiredSize || 0) <= 0) {
await writer.ready
}
// once writer has some capacity we write a block, however we do not
// await completion as we don't care when it's taken off the stream.
writer.write(block)
}
/* c8 ignore next */
if (root == null) throw new Error("no root block yielded")

if (closeWriter) {
await writer.close()
} else if (releaseLock) {
writer.releaseLock()
}

return {
cid: root.cid,
dagByteLength: UnixFS.cumulativeDagByteLength(root.bytes, root.value.entries),
}
}

/**
* @template {unknown} Layout
* @param {UnixFSPermaMap.PersistentHashMap<API.EntryLink>} hamt
* @param {UnixFSPermaMap.BitmapIndexedNode<API.EntryLink>} node
* @param {API.EncoderSettings<Layout>} settings
* @returns {AsyncIterableIterator<UnixFS.BlockView<UnixFS.DirectoryShard>>}
*/
const iterateBlocks = async function* (hamt, node, settings) {
/** @type {UnixFS.DirectoryEntryLink[]} */
const entries = []
for (const ent of UnixFSPermaMap.iterate(node)) {
if ('key' in ent) {
entries.push(/** @type {UnixFS.DirectoryEntryLink} */ ({
name: `${ent.prefix ?? ''}${ent.key ?? ''}`,
dagByteLength: ent.value.dagByteLength,
cid: ent.value.cid,
}))
} else {
/** @type {UnixFS.BlockView<UnixFS.DirectoryShard>?} */
let root = null
for await (const block of iterateBlocks(hamt, ent.node, settings)) {
yield block
root = block
}
/* c8 ignore next */
if (root == null) throw new Error("no root block yielded")

entries.push(/** @type {UnixFS.ShardedDirectoryLink} */ ({
name: ent.prefix,
dagByteLength: UnixFS.cumulativeDagByteLength(root.bytes, root.value.entries),
cid: root.cid
}))
}
}

const shard = UnixFS.createDirectoryShard(
entries,
UnixFSPermaMap.bitField(node),
UnixFSPermaMap.tableSize(hamt),
murmur364.code
)
yield await encodeHAMTShardBlock(shard, settings)
}

/**
* @template {unknown} Layout
* @param {UnixFS.DirectoryShard} shard
* @param {API.EncoderSettings<Layout>} settings
* @returns {Promise<UnixFS.BlockView<UnixFS.DirectoryShard>>}
*/
async function encodeHAMTShardBlock (shard, settings) {
const bytes = UnixFS.encodeHAMTShard(shard)
const hash = await settings.hasher.digest(bytes)
const cid = settings.linker.createLink(PB.code, hash)
// @ts-ignore Link is not CID
return new Block({ cid, bytes, value: shard })
}

/**
* @template L1, L2
* @param {API.View<L1>} state
* @param {Partial<API.Options<L1|L2>>} options
* @returns {API.View<L1|L2>}
*/
export const fork = (
{ state },
{
writer = state.writer,
metadata = state.metadata,
settings = state.settings,
} = {}
) =>
new HAMTDirectoryWriter({
writer,
metadata,
settings,
entries: new HashMap(UnixFSPermaMap.from(state.entries.entries()).createBuilder()),
closed: false,
})

/**
* @template [Layout=unknown]
* @implements {API.View<Layout>}
*/
class HAMTDirectoryWriter {
/**
* @param {API.State<Layout>} state
*/
constructor(state) {
this.state = state
}
get writer() {
return this.state.writer
}
get settings() {
return this.state.settings
}

/**
* @param {string} name
* @param {UnixFS.FileLink | UnixFS.DirectoryLink} link
* @param {API.WriteOptions} [options]
*/

set(name, link, options) {
return set(this, name, link, options)
}

/**
* @param {string} name
*/
remove(name) {
return remove(this, name)
}

/**
* @template L
* @param {Partial<API.Options<L>>} [options]
* @returns {API.View<Layout|L>}
*/
fork(options) {
return fork(this, options)
}

/**
* @param {API.CloseOptions} [options]
* @returns {Promise<UnixFS.DirectoryLink>}
*/
close(options) {
return close(this, options)
}

entries() {
return this.state.entries.entries()
}
/**
* @param {string} name
*/
has(name) {
return this.state.entries.has(name)
}
get size() {
return this.state.entries.size
}
}

/**
* @implements {Map<string, API.EntryLink>}
*/
class HashMap extends Map {
/**
* @param {UnixFSPermaMap.HashMapBuilder} [builder]
*/
constructor (builder = UnixFSPermaMap.builder()) {
super()
/** @type {UnixFSPermaMap.HashMapBuilder} */
this.builder = builder
}

clear() {
this.builder = UnixFSPermaMap.builder()
}

/**
* @param {string} key
*/
delete(key) {
const { root } = this.builder
this.builder.delete(key)
return this.builder.root !== root
}

/**
* @param {(value: API.EntryLink, key: string, map: Map<string, API.EntryLink>) => void} callbackfn
* @param {any} [thisArg]
*/
forEach(callbackfn, thisArg = this) {
for (const [k, v] of this.builder.root.entries()) {
callbackfn.call(thisArg, v, k, this)
}
}

/**
* @param {string} key
*/
get(key) {
return PermaMap.get(this.builder, key)
}

/**
* @param {string} key
*/
has(key) {
return PermaMap.has(this.builder, key)
}

/**
* @param {string} key
* @param {API.EntryLink} value
*/
set(key, value) {
this.builder.set(key, value)
return this
}

get size () {
return this.builder.size
}

[Symbol.iterator]() {
return this.builder.root.entries()
}

entries() {
return this.builder.root.entries()
}

keys() {
return this.builder.root.keys()
}

values() {
return this.builder.root.values()
}
}
8 changes: 8 additions & 0 deletions src/unixfs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import type {
Link as IPLDLink,
Version as LinkVersion,
Block as IPLDBlock,
BlockView as IPLDBlockView
} from "multiformats"
import { Data, type IData } from "../gen/unixfs.js"
export type { MultihashHasher, MultibaseEncoder, MultihashDigest, BlockEncoder }
Expand Down Expand Up @@ -401,3 +402,10 @@ export interface Block<
A extends number = number,
V extends LinkVersion = LinkVersion
> extends IPLDBlock<T, C, A, V> {}

export interface BlockView<
T = unknown,
C extends number = number,
A extends number = number,
V extends LinkVersion = LinkVersion
> extends IPLDBlockView<T, C, A, V> {}
Loading

0 comments on commit fb87f9d

Please sign in to comment.