Skip to content

Commit

Permalink
feat: upload-client uploadDirectory, by default, sorts the provided f…
Browse files Browse the repository at this point in the history
…iles by file name to help the user call us in a way that is deterministic and minimizes cost (storacha#1173)

Motivation:
* storacha#1172
* "Stabilize file ordering in uploadDirectory" in
https://github.com/web3-storage/secrets/issues/28
* this stabilizes it by sorting the files param unless an option is
passed telling us not to
  • Loading branch information
gobengo authored Nov 29, 2023
1 parent f54b343 commit 8cd2555
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 6 deletions.
2 changes: 1 addition & 1 deletion packages/filecoin-api/test/events/aggregator.js
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,7 @@ export const test = {
minPieceInsertedAt: new Date().toISOString(),
}
const putAggregateRes = await context.aggregateStore.put(
aggregateRecord
aggregateRecord,
)
assert.ok(putAggregateRes.ok)

Expand Down
10 changes: 7 additions & 3 deletions packages/upload-client/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import * as Store from './store.js'
import * as Upload from './upload.js'
import * as UnixFS from './unixfs.js'
import * as CAR from './car.js'
import { ShardingStream } from './sharding.js'
import { ShardingStream, defaultFileComparator } from './sharding.js'

export { Store, Upload, UnixFS, CAR }
export * from './sharding.js'
Expand Down Expand Up @@ -63,13 +63,17 @@ export async function uploadFile(conf, file, options = {}) {
* has the capability to perform the action.
*
* The issuer needs the `store/add` and `upload/add` delegated capability.
* @param {import('./types.js').FileLike[]} files File data.
* @param {import('./types.js').FileLike[]} files Files that should be in the directory.
* To ensure determinism in the IPLD encoding, files are automatically sorted by `file.name`.
* To retain the order of the files as passed in the array, set `customOrder` option to `true`.
* @param {import('./types.js').UploadDirectoryOptions} [options]
*/
export async function uploadDirectory(conf, files, options = {}) {
const { customOrder = false } = options
const entries = customOrder ? files : [...files].sort(defaultFileComparator)
return await uploadBlockStream(
conf,
UnixFS.createDirectoryEncoderStream(files, options),
UnixFS.createDirectoryEncoderStream(entries, options),
options
)
}
Expand Down
35 changes: 35 additions & 0 deletions packages/upload-client/src/sharding.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import { blockEncodingLength, encode, headerEncodingLength } from './car.js'

/**
* @typedef {import('./types.js').FileLike} FileLike
*/

// https://observablehq.com/@gozala/w3up-shard-size
const SHARD_SIZE = 133_169_152

Expand Down Expand Up @@ -84,3 +88,34 @@ export class ShardingStream extends TransformStream {
})
}
}

/**
* Default comparator for FileLikes. Sorts by file name in ascending order.
*
* @param {FileLike} a
* @param {FileLike} b
* @param {(file: FileLike) => string} getComparedValue - given a file being sorted, return the value by which its order should be determined, if it is different than the file object itself (e.g. file.name)
*/
export const defaultFileComparator = (
a,
b,
getComparedValue = (file) => file.name
) => {
return ascending(a, b, getComparedValue)
}

/**
* a comparator for sorting in ascending order. Use with Sorted or Array#sort.
*
* @template T
* @param {T} a
* @param {T} b
* @param {(i: T) => any} getComparedValue - given an item being sorted, return the value by which it should be sorted, if it is different than the item
*/
function ascending(a, b, getComparedValue) {
const ask = getComparedValue(a)
const bsk = getComparedValue(b)
if (ask === bsk) return 0
else if (ask < bsk) return -1
return 1
}
5 changes: 4 additions & 1 deletion packages/upload-client/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,10 @@ export interface UploadOptions
export interface UploadDirectoryOptions
extends UploadOptions,
UnixFSDirectoryEncoderOptions,
UploadProgressTrackable {}
UploadProgressTrackable {
/** whether the directory files have already been ordered in a custom way. indicates that the upload must not use a different order than the one provided. */
customOrder?: boolean
}

export interface BlobLike {
/**
Expand Down
141 changes: 140 additions & 1 deletion packages/upload-client/test/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@ import * as CAR from '@ucanto/transport/car'
import * as Signer from '@ucanto/principal/ed25519'
import * as StoreCapabilities from '@web3-storage/capabilities/store'
import * as UploadCapabilities from '@web3-storage/capabilities/upload'
import { uploadFile, uploadDirectory, uploadCAR } from '../src/index.js'
import {
uploadFile,
uploadDirectory,
uploadCAR,
defaultFileComparator,
} from '../src/index.js'
import { serviceSigner } from './fixtures.js'
import { randomBlock, randomBytes } from './helpers/random.js'
import { toCAR } from './helpers/car.js'
Expand Down Expand Up @@ -368,6 +373,140 @@ describe('uploadDirectory', () => {

assert.equal(carCIDs.length, 2)
})

it('sorts files unless options.customOrder', async () => {
const space = await Signer.generate()
const agent = await Signer.generate() // The "user" that will ask the service to accept the upload
const proofs = await Promise.all([
StoreCapabilities.add.delegate({
issuer: space,
audience: agent,
with: space.did(),
expiration: Infinity,
}),
UploadCapabilities.add.delegate({
issuer: space,
audience: agent,
with: space.did(),
expiration: Infinity,
}),
])
function createSimpleMockUploadServer() {
/**
* @type {Array<Server.ProviderInput<import('@ucanto/interface').InferInvokedCapability<import('@web3-storage/capabilities').Store['add']|import('@web3-storage/capabilities').Upload['add']>>>}
*/
const invocations = []
const service = mockService({
store: {
add: provide(StoreCapabilities.add, (invocation) => {
invocations.push(invocation)
return {
ok: {
status: 'upload',
headers: { 'x-test': 'true' },
url: 'http://localhost:9200',
with: invocation.capability.with,
link: /** @type {import('../src/types.js').CARLink} */ (
invocation.capability.nb.link
),
},
}
}),
},
upload: {
add: provide(UploadCapabilities.add, (invocation) => {
invocations.push(invocation)
const { capability } = invocation
if (!capability.nb) throw new Error('nb must be present')
return { ok: capability.nb }
}),
},
})
const server = Server.create({
id: serviceSigner,
service,
codec: CAR.inbound,
validateAuthorization,
})
const connection = Client.connect({
id: serviceSigner,
codec: CAR.outbound,
channel: server,
})
return { invocations, service, server, connection }
}

const unsortedFiles = [
new File([await randomBytes(32)], '/b.txt'),
new File([await randomBytes(32)], '/b.txt'),
new File([await randomBytes(32)], 'c.txt'),
new File([await randomBytes(32)], 'a.txt'),
]

const uploadServiceForUnordered = createSimpleMockUploadServer()
// uploading unsorted files should work because they should be sorted by `uploadDirectory`
const uploadedDirUnsorted = await uploadDirectory(
{ issuer: agent, with: space.did(), proofs, audience: serviceSigner },
unsortedFiles,
{ connection: uploadServiceForUnordered.connection }
)

const uploadServiceForOrdered = createSimpleMockUploadServer()
// uploading sorted files should also work
const uploadedDirSorted = await uploadDirectory(
{ issuer: agent, with: space.did(), proofs, audience: serviceSigner },
[...unsortedFiles].sort(defaultFileComparator),
{ connection: uploadServiceForOrdered.connection }
)

// upload/add roots should be the same.
assert.equal(
uploadedDirUnsorted.toString(),
uploadedDirSorted.toString(),
'CID of upload/add root is same regardless of whether files param is sorted or unsorted'
)

// We also need to make sure the underlying shards are the same.
const shardsForUnordered = uploadServiceForUnordered.invocations
.flatMap((i) =>
i.capability.can === 'upload/add' ? i.capability.nb.shards ?? [] : []
)
.map((cid) => cid.toString())
const shardsForOrdered = uploadServiceForOrdered.invocations
.flatMap((i) =>
i.capability.can === 'upload/add' ? i.capability.nb.shards ?? [] : []
)
.map((cid) => cid.toString())
assert.deepEqual(
shardsForUnordered,
shardsForOrdered,
'upload/add .nb.shards is identical regardless of ordering of files passed to uploadDirectory'
)

// but if options.customOrder is truthy, the caller is indicating
// they have customized the order of files, so `uploadDirectory` will not sort them
const uploadServiceForCustomOrder = createSimpleMockUploadServer()
const uploadedDirCustomOrder = await uploadDirectory(
{ issuer: agent, with: space.did(), proofs, audience: serviceSigner },
[...unsortedFiles],
{ connection: uploadServiceForCustomOrder.connection, customOrder: true }
)
const shardsForCustomOrder = uploadServiceForCustomOrder.invocations
.flatMap((i) =>
i.capability.can === 'upload/add' ? i.capability.nb.shards ?? [] : []
)
.map((cid) => cid.toString())
assert.notDeepEqual(
shardsForCustomOrder,
shardsForOrdered,
'should not produce sorted shards for customOrder files'
)
// upload/add roots will also be different
assert.notEqual(
uploadedDirCustomOrder.toString(),
shardsForOrdered.toString()
)
})
})

describe('uploadCAR', () => {
Expand Down

0 comments on commit 8cd2555

Please sign in to comment.