From 59c6279d35dbb62641e21a38756f0d2f153c28c6 Mon Sep 17 00:00:00 2001 From: Alan Shaw Date: Thu, 12 Aug 2021 13:59:58 +0100 Subject: [PATCH 1/5] fix: disallow CAR of single block with links --- packages/api/src/car.js | 72 ++++++++++++++++++------- packages/api/test/car.spec.js | 98 ++++++++++++++++++++++++++++++++++- 2 files changed, 150 insertions(+), 20 deletions(-) diff --git a/packages/api/src/car.js b/packages/api/src/car.js index 79014156be..7c4d15b3d5 100644 --- a/packages/api/src/car.js +++ b/packages/api/src/car.js @@ -139,10 +139,8 @@ export async function carPost (request, env, ctx) { const bytes = new Uint8Array(await blob.arrayBuffer()) const reader = await CarReader.fromBytes(bytes) - const chunkSize = await getBlocksSize(reader) - if (chunkSize === 0) { - throw new Error('empty CAR') - } + const stat = await carStat(reader) + await validateCar(reader, stat) // Ensure car blob.type is set; it is used by the cluster client to set the foramt=car flag on the /add call. const content = blob.slice(0, blob.size, 'application/car') @@ -188,7 +186,7 @@ export async function carPost (request, env, ctx) { try { await env.db.query(INCREMENT_USER_USED_STORAGE, { user: user._id, - amount: chunkSize + amount: stat.size }) } catch (err) { console.error(`failed to update user used storage: ${err.stack}`) @@ -263,11 +261,41 @@ export async function sizeOf (response) { } /** - * Returns the sum of all block sizes in the received CAR. Throws if any block + * @param {CarReader} reader + * @param {CarStat} stat + */ +async function validateCar (reader, stat) { + if (stat.blocks === 0) { + throw new Error('empty CAR') + } + + const roots = await reader.getRoots() + if (roots.length === 0) { + throw new Error('missing roots') + } + if (roots.length > 1) { + throw new Error('too many roots') + } + + const rootBlock = await getBlock(reader, roots[0]) + const numLinks = Array.from(rootBlock.links()).length + + // If the root block has links, then we should have at least 2 blocks in the CAR + if (numLinks > 0 && stat.blocks < 2) { + throw new Error('CAR must contain at least one non-root block') + } +} + +/** + * Returns the sum of all block sizes and total blocks. Throws if any block * is bigger than MAX_BLOCK_SIZE (1MiB). + * + * @typedef {{ size: number, blocks: number }} CarStat * @param {CarReader} reader + * @returns {Promise} */ -async function getBlocksSize (reader) { +async function carStat (reader) { + let blocks = 0 let size = 0 for await (const block of reader.blocks()) { const blockSize = block.bytes.byteLength @@ -275,8 +303,24 @@ async function getBlocksSize (reader) { throw new Error(`block too big: ${blockSize} > ${MAX_BLOCK_SIZE}`) } size += blockSize + blocks++ } - return size + return { size, blocks } +} + +const decoders = [pb, raw, cbor] + +/** + * @param {CarReader} reader + * @param {import('multiformats').CID} cid + */ +async function getBlock (reader, cid) { + const rawBlock = await reader.get(cid) + if (!rawBlock) throw new Error(`missing block for ${cid}`) + const { bytes } = rawBlock + const decoder = decoders.find(d => d.code === cid.code) + if (!decoder) throw new Error(`missing decoder for ${cid.code}`) + return new Block({ cid, bytes, value: decoder.decode(bytes) }) } /** @@ -284,20 +328,10 @@ async function getBlocksSize (reader) { * @param {CarReader} reader */ async function getDagSize (reader) { - const decoders = [pb, raw, cbor] const [rootCid] = await reader.getRoots() - const getBlock = async cid => { - const rawBlock = await reader.get(cid) - if (!rawBlock) throw new Error(`missing block for ${cid}`) - const { bytes } = rawBlock - const decoder = decoders.find(d => d.code === cid.code) - if (!decoder) throw new Error(`missing decoder for ${cid.code}`) - return new Block({ cid, bytes, value: decoder.decode(bytes) }) - } - const getSize = async cid => { - const block = await getBlock(cid) + const block = await getBlock(reader, cid) let size = block.bytes.byteLength for (const [, cid] of block.links()) { size += await getSize(cid) diff --git a/packages/api/test/car.spec.js b/packages/api/test/car.spec.js index ee99a66dd7..6c6f143b3b 100644 --- a/packages/api/test/car.spec.js +++ b/packages/api/test/car.spec.js @@ -100,6 +100,102 @@ describe('POST /car', () => { assert.strictEqual(res.ok, false) const { message } = await res.json() - assert.ok(message.includes('empty CAR')) + assert.strictEqual(message, 'empty CAR') + }) + + it('should throw for CAR with no roots', async () => { + const token = await getTestJWT() + + const bytes = pb.encode({ Data: new Uint8Array(), Links: [] }) + const hash = await sha256.digest(bytes) + const cid = CID.create(1, pb.code, hash) + + const { writer, out } = CarWriter.create([]) + writer.put({ cid, bytes }) + writer.close() + + const carBytes = [] + for await (const chunk of out) { + carBytes.push(chunk) + } + + const res = await fetch(new URL('car', endpoint), { + method: 'POST', + headers: { + Authorization: `Bearer ${token}`, + 'Content-Type': 'application/car' + }, + body: new Blob(carBytes) + }) + + assert.strictEqual(res.ok, false) + const { message } = await res.json() + assert.strictEqual(message, 'missing roots') + }) + + it('should throw for CAR with multiple roots', async () => { + const token = await getTestJWT() + + const bytes = pb.encode({ Data: new Uint8Array(), Links: [] }) + const hash = await sha256.digest(bytes) + const cid = CID.create(1, pb.code, hash) + + const { writer, out } = CarWriter.create([ + cid, + CID.parse('bafybeibqmrg5e5bwhx2ny4kfcjx2mm3ohh2cd4i54wlygquwx7zbgwqs4e') + ]) + writer.put({ cid, bytes }) + writer.close() + + const carBytes = [] + for await (const chunk of out) { + carBytes.push(chunk) + } + + const res = await fetch(new URL('car', endpoint), { + method: 'POST', + headers: { + Authorization: `Bearer ${token}`, + 'Content-Type': 'application/car' + }, + body: new Blob(carBytes) + }) + + assert.strictEqual(res.ok, false) + const { message } = await res.json() + assert.strictEqual(message, 'too many roots') + }) + + it('should throw for CAR with one root block that has links', async () => { + const token = await getTestJWT() + + const bytes = pb.encode({ + Data: new Uint8Array(), + Links: [{ Hash: CID.parse('bafybeibqmrg5e5bwhx2ny4kfcjx2mm3ohh2cd4i54wlygquwx7zbgwqs4e') }] + }) + const hash = await sha256.digest(bytes) + const cid = CID.create(1, pb.code, hash) + + const { writer, out } = CarWriter.create(cid) + writer.put({ cid, bytes }) + writer.close() + + const carBytes = [] + for await (const chunk of out) { + carBytes.push(chunk) + } + + const res = await fetch(new URL('car', endpoint), { + method: 'POST', + headers: { + Authorization: `Bearer ${token}`, + 'Content-Type': 'application/car' + }, + body: new Blob(carBytes) + }) + + assert.strictEqual(res.ok, false) + const { message } = await res.json() + assert.strictEqual(message, 'CAR must contain at least one non-root block') }) }) From c16692ca7ccac55fb1dd07da5be5c1ea41a246f3 Mon Sep 17 00:00:00 2001 From: Alan Shaw Date: Thu, 12 Aug 2021 15:29:01 +0100 Subject: [PATCH 2/5] refactor: review feedback --- packages/api/src/car.js | 66 ++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/packages/api/src/car.js b/packages/api/src/car.js index 7c4d15b3d5..1be50b295e 100644 --- a/packages/api/src/car.js +++ b/packages/api/src/car.js @@ -1,6 +1,6 @@ /* eslint-env serviceworker */ import { gql } from '@web3-storage/db' -import { CarReader } from '@ipld/car' +import { CarReader, CarBlockIterator } from '@ipld/car' import { Block } from 'multiformats/block' import * as raw from 'multiformats/codecs/raw' import * as cbor from '@ipld/dag-cbor' @@ -10,6 +10,8 @@ import { GATEWAY, LOCAL_ADD_THRESHOLD, DAG_SIZE_CALC_LIMIT, MAX_BLOCK_SIZE } fro import { JSONResponse } from './utils/json-response.js' import { toPinStatusEnum } from './utils/pin.js' +const decoders = [pb, raw, cbor] + const CREATE_UPLOAD = gql` mutation CreateUpload($data: CreateUploadInput!) { createUpload(data: $data) { @@ -137,10 +139,9 @@ export async function carPost (request, env, ctx) { const blob = await request.blob() const bytes = new Uint8Array(await blob.arrayBuffer()) - const reader = await CarReader.fromBytes(bytes) + const stat = await carStat(bytes) - const stat = await carStat(reader) - await validateCar(reader, stat) + await validateCar(bytes, stat) // Ensure car blob.type is set; it is used by the cluster client to set the foramt=car flag on the /add call. const content = blob.slice(0, blob.size, 'application/car') @@ -198,7 +199,7 @@ export async function carPost (request, env, ctx) { tasks.push(async () => { let dagSize try { - dagSize = await getDagSize(reader) + dagSize = await getDagSize(bytes) } catch (err) { console.error(`could not determine DAG size: ${err.stack}`) return @@ -261,28 +262,32 @@ export async function sizeOf (response) { } /** - * @param {CarReader} reader + * @param {Uint8Array} carBytes * @param {CarStat} stat */ -async function validateCar (reader, stat) { - if (stat.blocks === 0) { - throw new Error('empty CAR') - } - - const roots = await reader.getRoots() - if (roots.length === 0) { +async function validateCar (carBytes, stat) { + if (stat.roots.length === 0) { throw new Error('missing roots') } - if (roots.length > 1) { + if (stat.roots.length > 1) { throw new Error('too many roots') } - - const rootBlock = await getBlock(reader, roots[0]) - const numLinks = Array.from(rootBlock.links()).length - - // If the root block has links, then we should have at least 2 blocks in the CAR - if (numLinks > 0 && stat.blocks < 2) { - throw new Error('CAR must contain at least one non-root block') + if (stat.blocks === 0) { + throw new Error('empty CAR') + } + if (stat.blocks === 1) { + const rootCid = stat.roots[0] + const canDecode = decoders.some(d => d.code === rootCid.code) + // if we can't decode, we can't check this... + if (canDecode) { + const reader = await CarReader.fromBytes(carBytes) + const rootBlock = await getBlock(reader, rootCid) + const numLinks = Array.from(rootBlock.links()).length + // if the root block has links, then we should have at least 2 blocks in the CAR + if (numLinks > 0 && stat.blocks < 2) { + throw new Error('CAR must contain at least one non-root block') + } + } } } @@ -290,14 +295,15 @@ async function validateCar (reader, stat) { * Returns the sum of all block sizes and total blocks. Throws if any block * is bigger than MAX_BLOCK_SIZE (1MiB). * - * @typedef {{ size: number, blocks: number }} CarStat - * @param {CarReader} reader + * @typedef {{ roots: import('multiformats').CID[], size: number, blocks: number }} CarStat + * @param {Uint8Array} carBytes * @returns {Promise} */ -async function carStat (reader) { +async function carStat (carBytes) { + const blocksIterator = await CarBlockIterator.fromBytes(carBytes) let blocks = 0 let size = 0 - for await (const block of reader.blocks()) { + for await (const block of blocksIterator) { const blockSize = block.bytes.byteLength if (blockSize > MAX_BLOCK_SIZE) { throw new Error(`block too big: ${blockSize} > ${MAX_BLOCK_SIZE}`) @@ -305,11 +311,10 @@ async function carStat (reader) { size += blockSize blocks++ } - return { size, blocks } + const roots = await blocksIterator.getRoots() + return { roots, size, blocks } } -const decoders = [pb, raw, cbor] - /** * @param {CarReader} reader * @param {import('multiformats').CID} cid @@ -325,9 +330,10 @@ async function getBlock (reader, cid) { /** * Returns the DAG size of the CAR but only if the graph is complete. - * @param {CarReader} reader + * @param {Uint8Array} carBytes */ -async function getDagSize (reader) { +async function getDagSize (carBytes) { + const reader = await CarReader.fromBytes(carBytes) const [rootCid] = await reader.getRoots() const getSize = async cid => { From 538a69e2d81125900872a14e50ef958decc1c068 Mon Sep 17 00:00:00 2001 From: Alan Shaw Date: Thu, 12 Aug 2021 15:31:11 +0100 Subject: [PATCH 3/5] fix: remove unnecessary condition --- packages/api/src/car.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/api/src/car.js b/packages/api/src/car.js index 1be50b295e..4143100727 100644 --- a/packages/api/src/car.js +++ b/packages/api/src/car.js @@ -284,7 +284,7 @@ async function validateCar (carBytes, stat) { const rootBlock = await getBlock(reader, rootCid) const numLinks = Array.from(rootBlock.links()).length // if the root block has links, then we should have at least 2 blocks in the CAR - if (numLinks > 0 && stat.blocks < 2) { + if (numLinks > 0) { throw new Error('CAR must contain at least one non-root block') } } From b570f068c9e18bf135bbb3a2b4facd3b5084fa5b Mon Sep 17 00:00:00 2001 From: Alan Shaw Date: Thu, 12 Aug 2021 15:56:41 +0100 Subject: [PATCH 4/5] refactor: pull validation into stat --- packages/api/src/car.js | 99 ++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/packages/api/src/car.js b/packages/api/src/car.js index 4143100727..3ea073dd81 100644 --- a/packages/api/src/car.js +++ b/packages/api/src/car.js @@ -141,8 +141,6 @@ export async function carPost (request, env, ctx) { const bytes = new Uint8Array(await blob.arrayBuffer()) const stat = await carStat(bytes) - await validateCar(bytes, stat) - // Ensure car blob.type is set; it is used by the cluster client to set the foramt=car flag on the /add call. const content = blob.slice(0, blob.size, 'application/car') @@ -262,45 +260,30 @@ export async function sizeOf (response) { } /** - * @param {Uint8Array} carBytes - * @param {CarStat} stat - */ -async function validateCar (carBytes, stat) { - if (stat.roots.length === 0) { - throw new Error('missing roots') - } - if (stat.roots.length > 1) { - throw new Error('too many roots') - } - if (stat.blocks === 0) { - throw new Error('empty CAR') - } - if (stat.blocks === 1) { - const rootCid = stat.roots[0] - const canDecode = decoders.some(d => d.code === rootCid.code) - // if we can't decode, we can't check this... - if (canDecode) { - const reader = await CarReader.fromBytes(carBytes) - const rootBlock = await getBlock(reader, rootCid) - const numLinks = Array.from(rootBlock.links()).length - // if the root block has links, then we should have at least 2 blocks in the CAR - if (numLinks > 0) { - throw new Error('CAR must contain at least one non-root block') - } - } - } -} - -/** - * Returns the sum of all block sizes and total blocks. Throws if any block - * is bigger than MAX_BLOCK_SIZE (1MiB). + * Returns the sum of all block sizes and total blocks. Throws if the CAR does + * not conform to our idea of a valid CAR i.e. + * - Missing root CIDs + * - >1 root CID + * - Any block bigger than MAX_BLOCK_SIZE (1MiB) + * - 0 blocks + * - Missing root block + * - Missing non-root blocks (when root block has links) * - * @typedef {{ roots: import('multiformats').CID[], size: number, blocks: number }} CarStat + * @typedef {{ size: number, blocks: number }} CarStat * @param {Uint8Array} carBytes * @returns {Promise} */ async function carStat (carBytes) { const blocksIterator = await CarBlockIterator.fromBytes(carBytes) + const roots = await blocksIterator.getRoots() + if (roots.length === 0) { + throw new Error('missing roots') + } + if (roots.length > 1) { + throw new Error('too many roots') + } + const rootCid = roots[0] + let rawRootBlock let blocks = 0 let size = 0 for await (const block of blocksIterator) { @@ -308,24 +291,31 @@ async function carStat (carBytes) { if (blockSize > MAX_BLOCK_SIZE) { throw new Error(`block too big: ${blockSize} > ${MAX_BLOCK_SIZE}`) } + if (!rawRootBlock && block.cid.equals(rootCid)) { + rawRootBlock = block + } size += blockSize blocks++ } - const roots = await blocksIterator.getRoots() - return { roots, size, blocks } -} - -/** - * @param {CarReader} reader - * @param {import('multiformats').CID} cid - */ -async function getBlock (reader, cid) { - const rawBlock = await reader.get(cid) - if (!rawBlock) throw new Error(`missing block for ${cid}`) - const { bytes } = rawBlock - const decoder = decoders.find(d => d.code === cid.code) - if (!decoder) throw new Error(`missing decoder for ${cid.code}`) - return new Block({ cid, bytes, value: decoder.decode(bytes) }) + if (blocks === 0) { + throw new Error('empty CAR') + } + if (!rawRootBlock) { + throw new Error('missing root block') + } + if (blocks === 1) { + const decoder = decoders.find(d => d.code === rootCid.code) + // if we can't decode, we can't check this... + if (decoder) { + const rootBlock = new Block({ cid: rootCid, bytes: rawRootBlock.bytes, value: decoder.decode(rawRootBlock.bytes) }) + const numLinks = Array.from(rootBlock.links()).length + // if the root block has links, then we should have at least 2 blocks in the CAR + if (numLinks > 0) { + throw new Error('CAR must contain at least one non-root block') + } + } + } + return { size, blocks } } /** @@ -336,6 +326,15 @@ async function getDagSize (carBytes) { const reader = await CarReader.fromBytes(carBytes) const [rootCid] = await reader.getRoots() + const getBlock = async cid => { + const rawBlock = await reader.get(cid) + if (!rawBlock) throw new Error(`missing block for ${cid}`) + const { bytes } = rawBlock + const decoder = decoders.find(d => d.code === cid.code) + if (!decoder) throw new Error(`missing decoder for ${cid.code}`) + return new Block({ cid, bytes, value: decoder.decode(bytes) }) + } + const getSize = async cid => { const block = await getBlock(reader, cid) let size = block.bytes.byteLength From e7ee1ce65a0dd67adc5a0b144800cd7f4e3707c0 Mon Sep 17 00:00:00 2001 From: Alan Shaw Date: Thu, 12 Aug 2021 15:57:43 +0100 Subject: [PATCH 5/5] fix: getBlock not take reader --- packages/api/src/car.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/api/src/car.js b/packages/api/src/car.js index 3ea073dd81..2183c17b62 100644 --- a/packages/api/src/car.js +++ b/packages/api/src/car.js @@ -336,7 +336,7 @@ async function getDagSize (carBytes) { } const getSize = async cid => { - const block = await getBlock(reader, cid) + const block = await getBlock(cid) let size = block.bytes.byteLength for (const [, cid] of block.links()) { size += await getSize(cid)