Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: disallow CAR of single block with links #344

Merged
merged 5 commits into from
Aug 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 57 additions & 18 deletions packages/api/src/car.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* eslint-env serviceworker */
import { gql } from '@web3-storage/db'
import { CarReader } from '@ipld/car'
import { CarReader, CarBlockIterator } from '@ipld/car'
import { Block } from 'multiformats/block'
import * as raw from 'multiformats/codecs/raw'
import * as cbor from '@ipld/dag-cbor'
Expand All @@ -10,6 +10,8 @@ import { GATEWAY, LOCAL_ADD_THRESHOLD, DAG_SIZE_CALC_LIMIT, MAX_BLOCK_SIZE } fro
import { JSONResponse } from './utils/json-response.js'
import { toPinStatusEnum } from './utils/pin.js'

const decoders = [pb, raw, cbor]

const CREATE_UPLOAD = gql`
mutation CreateUpload($data: CreateUploadInput!) {
createUpload(data: $data) {
Expand Down Expand Up @@ -137,12 +139,7 @@ export async function carPost (request, env, ctx) {

const blob = await request.blob()
const bytes = new Uint8Array(await blob.arrayBuffer())
const reader = await CarReader.fromBytes(bytes)

const chunkSize = await getBlocksSize(reader)
if (chunkSize === 0) {
throw new Error('empty CAR')
}
const stat = await carStat(bytes)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CAR could be LARGE at this point. We could pass a clone of the request to stat and have it convert the request body into an AsyncIterable so that we can use CarBlockIterator.fromIterable inside stat, to avoid creating a second copy of the CAR in mem as bytes, until after we scan it to figure out if it is large or invalid or both.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that's a good suggestion but please can we not perf creep any more and get this fix merged 🙏?


// Ensure car blob.type is set; it is used by the cluster client to set the foramt=car flag on the /add call.
const content = blob.slice(0, blob.size, 'application/car')
Expand Down Expand Up @@ -188,7 +185,7 @@ export async function carPost (request, env, ctx) {
try {
await env.db.query(INCREMENT_USER_USED_STORAGE, {
user: user._id,
amount: chunkSize
amount: stat.size
})
} catch (err) {
console.error(`failed to update user used storage: ${err.stack}`)
Expand All @@ -200,7 +197,7 @@ export async function carPost (request, env, ctx) {
tasks.push(async () => {
let dagSize
try {
dagSize = await getDagSize(reader)
dagSize = await getDagSize(bytes)
} catch (err) {
console.error(`could not determine DAG size: ${err.stack}`)
return
Expand Down Expand Up @@ -263,28 +260,70 @@ export async function sizeOf (response) {
}

/**
* Returns the sum of all block sizes in the received CAR. Throws if any block
* is bigger than MAX_BLOCK_SIZE (1MiB).
* @param {CarReader} reader
* Returns the sum of all block sizes and total blocks. Throws if the CAR does
* not conform to our idea of a valid CAR i.e.
* - Missing root CIDs
* - >1 root CID
* - Any block bigger than MAX_BLOCK_SIZE (1MiB)
* - 0 blocks
* - Missing root block
* - Missing non-root blocks (when root block has links)
*
* @typedef {{ size: number, blocks: number }} CarStat
* @param {Uint8Array} carBytes
* @returns {Promise<CarStat>}
*/
async function getBlocksSize (reader) {
async function carStat (carBytes) {
const blocksIterator = await CarBlockIterator.fromBytes(carBytes)
const roots = await blocksIterator.getRoots()
if (roots.length === 0) {
throw new Error('missing roots')
}
if (roots.length > 1) {
throw new Error('too many roots')
}
const rootCid = roots[0]
let rawRootBlock
let blocks = 0
let size = 0
for await (const block of reader.blocks()) {
for await (const block of blocksIterator) {
const blockSize = block.bytes.byteLength
if (blockSize > MAX_BLOCK_SIZE) {
throw new Error(`block too big: ${blockSize} > ${MAX_BLOCK_SIZE}`)
}
if (!rawRootBlock && block.cid.equals(rootCid)) {
rawRootBlock = block
}
size += blockSize
blocks++
}
return size
if (blocks === 0) {
throw new Error('empty CAR')
}
if (!rawRootBlock) {
throw new Error('missing root block')
}
if (blocks === 1) {
const decoder = decoders.find(d => d.code === rootCid.code)
// if we can't decode, we can't check this...
if (decoder) {
const rootBlock = new Block({ cid: rootCid, bytes: rawRootBlock.bytes, value: decoder.decode(rawRootBlock.bytes) })
const numLinks = Array.from(rootBlock.links()).length
// if the root block has links, then we should have at least 2 blocks in the CAR
if (numLinks > 0) {
throw new Error('CAR must contain at least one non-root block')
}
}
}
return { size, blocks }
}

/**
* Returns the DAG size of the CAR but only if the graph is complete.
* @param {CarReader} reader
* @param {Uint8Array} carBytes
*/
async function getDagSize (reader) {
const decoders = [pb, raw, cbor]
async function getDagSize (carBytes) {
const reader = await CarReader.fromBytes(carBytes)
const [rootCid] = await reader.getRoots()

const getBlock = async cid => {
Expand Down
98 changes: 97 additions & 1 deletion packages/api/test/car.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,102 @@ describe('POST /car', () => {

assert.strictEqual(res.ok, false)
const { message } = await res.json()
assert.ok(message.includes('empty CAR'))
assert.strictEqual(message, 'empty CAR')
})

it('should throw for CAR with no roots', async () => {
const token = await getTestJWT()

const bytes = pb.encode({ Data: new Uint8Array(), Links: [] })
const hash = await sha256.digest(bytes)
const cid = CID.create(1, pb.code, hash)

const { writer, out } = CarWriter.create([])
writer.put({ cid, bytes })
writer.close()

const carBytes = []
for await (const chunk of out) {
carBytes.push(chunk)
}

const res = await fetch(new URL('car', endpoint), {
method: 'POST',
headers: {
Authorization: `Bearer ${token}`,
'Content-Type': 'application/car'
},
body: new Blob(carBytes)
})

assert.strictEqual(res.ok, false)
const { message } = await res.json()
assert.strictEqual(message, 'missing roots')
})

it('should throw for CAR with multiple roots', async () => {
const token = await getTestJWT()

const bytes = pb.encode({ Data: new Uint8Array(), Links: [] })
const hash = await sha256.digest(bytes)
const cid = CID.create(1, pb.code, hash)

const { writer, out } = CarWriter.create([
cid,
CID.parse('bafybeibqmrg5e5bwhx2ny4kfcjx2mm3ohh2cd4i54wlygquwx7zbgwqs4e')
])
writer.put({ cid, bytes })
writer.close()

const carBytes = []
for await (const chunk of out) {
carBytes.push(chunk)
}

const res = await fetch(new URL('car', endpoint), {
method: 'POST',
headers: {
Authorization: `Bearer ${token}`,
'Content-Type': 'application/car'
},
body: new Blob(carBytes)
})

assert.strictEqual(res.ok, false)
const { message } = await res.json()
assert.strictEqual(message, 'too many roots')
})

it('should throw for CAR with one root block that has links', async () => {
const token = await getTestJWT()

const bytes = pb.encode({
Data: new Uint8Array(),
Links: [{ Hash: CID.parse('bafybeibqmrg5e5bwhx2ny4kfcjx2mm3ohh2cd4i54wlygquwx7zbgwqs4e') }]
})
const hash = await sha256.digest(bytes)
const cid = CID.create(1, pb.code, hash)

const { writer, out } = CarWriter.create(cid)
writer.put({ cid, bytes })
writer.close()

const carBytes = []
for await (const chunk of out) {
carBytes.push(chunk)
}

const res = await fetch(new URL('car', endpoint), {
method: 'POST',
headers: {
Authorization: `Bearer ${token}`,
'Content-Type': 'application/car'
},
body: new Blob(carBytes)
})

assert.strictEqual(res.ok, false)
const { message } = await res.json()
assert.strictEqual(message, 'CAR must contain at least one non-root block')
})
})