Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

migrate-from-w32023-to-mime #1

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions DEMO.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
assign fixture to $upload variable
```shell
upload='{"_id":"1","type":"Car","name":"Upload at 2024-01-19T04:40:04.490Z","created":"2024-01-19T04:40:04.49+00:00","updated":"2024-01-19T04:40:04.49+00:00","cid":"bafybeihtddvvufnzdcetubq5mbv2rvgjchlipf6y7esei5qzg4r7re7rju","dagSize":2949303,"pins":[{"status":"Pinned","updated":"2024-01-19T04:40:04.49+00:00","peerId":"bafzbeibhqavlasjc7dvbiopygwncnrtvjd2xmryk5laib7zyjor6kf3avm","peerName":"elastic-ipfs","region":null}],"parts":["bagbaieraclriozt34fk5ej3aa7k67es2hyq5zyc3ohivgbee4qeyyeroqb4a"],"deals":[]}'
```

`upload-to-files.js` will fetch the `parts` from the provided upload, decode them, and write to local fs
```shell
⚡ echo "$upload" | node upload-to-file.js
piped to bafybeihtddvvufnzdcetubq5mbv2rvgjchlipf6y7esei5qzg4r7re7rju/GOPR0787.JPG
```

`migrate-from-w32023-to-mime` will convert a stream of uploads to a stream of MIME containing the uploads + blocks etc.

```shell
⚡ echo $upload | migrate-from-w32023-to-mime --fetchParts > bengo.mime
wrote /dev/stdout

⚡ cat bengo.mime | head -n35
Content-Type: multipart/mixed; boundary=kf7cqzqdegb; type=multipart/mixed

--kf7cqzqdegb
content-disposition: attachment; filename="Upload at 2024-01-19T04:40:04.490Z.bafybeihtddvvufnzdcetubq5mbv2rvgjchlipf6y7esei5qzg4r7re7rju.ipfs"
content-type: application/vnd.web3.storage.car+json;version=2023.old.web3.storage
content-id: bagaaieradpqpmczlz6gwmwew34qjobdhf5k6qlcy4q4wbvocrnn3qhpfhymq

{
"_id": "1",
"type": "Car",
"name": "Upload at 2024-01-19T04:40:04.490Z",
"created": "2024-01-19T04:40:04.49+00:00",
"updated": "2024-01-19T04:40:04.49+00:00",
"cid": "bafybeihtddvvufnzdcetubq5mbv2rvgjchlipf6y7esei5qzg4r7re7rju",
"dagSize": 2949303,
"pins": [
{
"status": "Pinned",
"updated": "2024-01-19T04:40:04.49+00:00",
"peerId": "bafzbeibhqavlasjc7dvbiopygwncnrtvjd2xmryk5laib7zyjor6kf3avm",
"peerName": "elastic-ipfs",
"region": null
}
],
"parts": [
"bagbaieraclriozt34fk5ej3aa7k67es2hyq5zyc3ohivgbee4qeyyeroqb4a"
],
"deals": []
}

--kf7cqzqdegb
content-id: bafybeihtddvvufnzdcetubq5mbv2rvgjchlipf6y7esei5qzg4r7re7rju
content-type: application/vnd.ipld.car
content-transfer-encoding: BASE64
# much base64(CAR) on next line
```
3 changes: 2 additions & 1 deletion create-unixfs.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ import * as Link from 'multiformats/link'
import { CARWriterStream } from 'carstream/writer'

const token = process.env.WEB3_STORAGE_TOKEN ?? ''
const storage = new Web3Storage({ token })
const endpoint = process.env.W3_URL ? new URL(process.env.W3_URL) : undefined
const storage = new Web3Storage({ token, endpoint })

const { readable, writable } = new TransformStream()
const writer = UnixFS.createWriter({ writable })
Expand Down
154 changes: 154 additions & 0 deletions migrate-from-w32023-to-mime.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
#!/usr/bin/env node
/**
* @fileoverview command line interface with tools for migrating from https://old.web3.storage/.
*/
import fs from 'fs'
import { fileURLToPath } from 'url'
import {parseArgs} from 'node:util'
import { Readable } from 'stream'
import readNDJSONStream from 'ndjson-readablestream';
import stream from 'node:stream'
import { CID } from 'multiformats/cid'
import * as dagJson from 'multiformats/codecs/json'
import { sha256 } from 'multiformats/hashes/sha2'
const Multipart = await import('multipart-stream').then(m => m.default)
import {Base64Encode} from 'base64-stream';
import { fetchUploadParts } from './upload.js';

const isMain = (url, argv=process.argv) => fileURLToPath(url) === fs.realpathSync(argv[1])
if (isMain(import.meta.url, process.argv)) {
main(process.argv).catch(error => console.error('error in main()', error))
}

function getContentTypeFromCid(cid) {
const parsed = CID.parse(cid)
switch (parsed.code) {
case 0x0202:
return 'application/vnd.ipld.car'
case 0x70:
return 'application/vnd.ipld.dag-pb'
default:
throw new Error(`unexpected code ${parsed.code}`)
}
}

async function main(argv) {
const options = {
from: {
type: 'string',
default: '/dev/stdin',
help: 'where to get data from'
},
fromMediaType: {
type: 'string',
default: 'application/vnd.web3.storage.car+ndjson;version=2023.old.web3.storage',
help: 'what kind of data to expect when sourced from options.from'
},
to: {
type: 'string',
help: 'where to write',
default: '/dev/stdout',
},
fetchParts: {
type: 'boolean',
default: false,
help: 'whether to fetch parts for each upload and include those in the multipart output',
}
}
const args = parseArgs({
args: argv.slice(2),
options,
})
const toMediaType = args.values.fromMediaType.replace(/\+ndjson$/, '+json')
let encoded
switch (toMediaType) {
case 'application/vnd.web3.storage.car+ndjson;version=2023.old.web3.storage':
encoded = await createMultipartRelatedReadable(
Readable.toWeb(fs.createReadStream(args.values.from)),
{
type: 'multipart/mixed',
fetchParts: args.values.fetchParts,
getPartContentType: () => args.values.fromMediaType.replace(/\+ndjson/, '+json'),
getPartHeaders: (object) => {
return {
...(object.name ? {
'content-disposition': `attachment; filename="${object.name}.${object.cid}.ipfs"`,
} : {})
}
}
}
)
break;
default:
throw new Error(`unsupported target mediaType "${toMediaType}"`)
}
const to = args.values.to
await stream.pipeline(encoded, fs.createWriteStream(to), (err) => {
throw err
})
console.warn('wrote', to)
}

/**
* @param {ReadableStream<Uint8Array>} ndjsonUploads
* @param {object} [options]
* @param {string} [options.type] - content-type of whole message
* @param {boolean} [options.fetchParts] - whether to fetch part CID for each upload
* @param {(object) => Promise<void>} [options.forEachUpload]
* @param {(object) => string} [options.getPartContentType] - get content type for a single part
* @param {(object) => object} [options.getPartHeaders] - get headers for a single part
*/
async function createMultipartRelatedReadable(ndjsonUploads, options={}) {
const { type = 'Multipart/Mixed' } = options
const uploadsMultipart = new Multipart()
/** @type {Promise<FetchedUploadPart[]>[]} */
const queueToFetchUploadParts = []
for await (const object of readNDJSONStream(ndjsonUploads)) {
await options?.forEachUpload?.(object)
const body = JSON.stringify(object, undefined, 2) + '\n'
const bodyDagJsonCid = CID.create(1, dagJson.code, await sha256.digest(dagJson.encode(object)))
const contentType = options?.getPartContentType?.(object);
uploadsMultipart.addPart({
headers: {
...(options?.getPartHeaders(object) ?? {}),
...(contentType ? { 'content-type': contentType } : {}),
'content-id': `${bodyDagJsonCid}`,
},
body,
})

if (options.fetchParts) {
queueToFetchUploadParts.push(fetchUploadParts(object))
}
}
while (queueToFetchUploadParts.length) {
const fetchedUploads = await (queueToFetchUploadParts.pop())
for (const { upload, response, url, cid } of fetchedUploads) {
uploadsMultipart.addPart({
headers: {
'content-id': upload.cid,
'content-type': getContentTypeFromCid(cid),
'content-transfer-encoding': 'BASE64'
},
body: Readable.fromWeb(response.body).pipe(new Base64Encode),
})
}
}
const multipartUploads = () => Readable.from(async function * () {
function * text (text) { yield new TextEncoder().encode(text) }
yield * text(`Content-Type: ${type}`)
yield * text(`; boundary=${uploadsMultipart.boundary}`)
if (options?.type) {
yield * text(`; type=${options.type}`)
}
yield * text('\n\n')
yield * new ReadableStream({
async start(controller) {
uploadsMultipart.on('data', chunk => {
controller.enqueue(chunk)
})
}
})
}())
return multipartUploads()
}
56 changes: 56 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 9 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,23 @@
"main": "index.js",
"type": "module",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
"test": "echo \"Error: no test specified\" && exit 1",
"migrate-from-w32023-to-mime": "./migrate-from-w32023-to-mime.js"
},
"author": "Alan Shaw",
"license": "MIT",
"dependencies": {
"@ipld/dag-json": "^10.1.7",
"@ipld/unixfs": "^2.1.2",
"@web3-storage/pail": "^0.4.0",
"base64-stream": "^1.0.0",
"carstream": "^1.1.1",
"multiformats": "^13.0.1",
"multipart-stream": "^2.0.1",
"ndjson-readablestream": "^1.1.0",
"web3.storage": "^4.5.5"
},
"bin": {
"migrate-from-w32023-to-mime": "./migrate-from-w32023-to-mime.js"
}
}
Loading