From 389d652392fe0ce4df24873d1dfe18eef68f9374 Mon Sep 17 00:00:00 2001 From: Oli Evans Date: Fri, 16 Dec 2022 10:21:49 +0000 Subject: [PATCH] feat: fast 304 for `if-none-match` requests (#120) * feat: fast 304 for if-none-match requests where the request provides `if-none-match` header as a cid etag and it matches the cid requested and there is no path, we have all the info we need to return a "304 not modified". fix bugs where response.ok is only true for 200-299 response, but we may get 304 from upstream and thats ok too. see: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match see: https://developer.mozilla.org/en-US/docs/Web/API/Response/ok License: MIT Signed-off-by: Oli Evans --- packages/edge-gateway/src/constants.js | 2 + packages/edge-gateway/src/gateway.js | 23 ++++++- packages/edge-gateway/test/index.spec.js | 68 +++++++++++++++++++ .../test/mocks/ipfs.io/get_ipfs#@cid#path.js | 19 ++++++ packages/ipfs-gateway-race/lib/index.js | 2 +- packages/ipfs-gateway-race/test/index.spec.js | 24 +++++++ .../test/mocks/cf-ipfs.com/get_ipfs#@cid.js | 20 +++++- .../test/mocks/ipfs.io/get_ipfs#@cid.js | 20 +++++- 8 files changed, 168 insertions(+), 10 deletions(-) diff --git a/packages/edge-gateway/src/constants.js b/packages/edge-gateway/src/constants.js index 6e3dccb..df90b6f 100644 --- a/packages/edge-gateway/src/constants.js +++ b/packages/edge-gateway/src/constants.js @@ -4,6 +4,7 @@ export const CF_CACHE_MAX_OBJECT_SIZE = 512 * Math.pow(1024, 2) // 512MB to byte * @type {Record} */ export const RESOLUTION_LAYERS = { + SHORTCUT: 'shortcut', CDN: 'cdn', DOTSTORAGE_RACE: 'dotstorage-race', PUBLIC_RACE_L1: 'public-race-l1', @@ -11,6 +12,7 @@ export const RESOLUTION_LAYERS = { } export const RESOLUTION_IDENTIFIERS = { + IF_NONE_MATCH: 'if-none-match', CACHE_ZONE: 'cache-zone', PERMA_CACHE: 'perma-cache' } diff --git a/packages/edge-gateway/src/gateway.js b/packages/edge-gateway/src/gateway.js index 0e29c24..e6f6a6e 100644 --- a/packages/edge-gateway/src/gateway.js +++ b/packages/edge-gateway/src/gateway.js @@ -21,7 +21,7 @@ import { /** * @typedef {import('./env').Env} Env - * @typedef {'cdn' | 'dotstorage-race' | 'public-race-l1' | 'public-race-l2'} ResolutionLayer + * @typedef {'shortcut' | 'cdn' | 'dotstorage-race' | 'public-race-l1' | 'public-race-l2'} ResolutionLayer * * @typedef {import('ipfs-gateway-race').GatewayResponse} GatewayResponse * @typedef {import('ipfs-gateway-race').GatewayResponsePromise} GatewayResponsePromise @@ -61,6 +61,23 @@ export async function gatewayGet (request, env, ctx) { const cid = await getCidFromSubdomainUrl(reqUrl) const pathname = reqUrl.pathname + // return 304 "not modified" response if user sends us a cid etag in `if-none-match` + const reqEtag = request.headers.get('if-none-match') + if (reqEtag && (pathname === '' || pathname === '/')) { + const etag = `"${cid}"` + const weakEtag = `W/${etag}` + if (reqEtag === etag || reqEtag === weakEtag) { + const res = new Response(null, { + status: 304, + headers: new Headers({ + 'cache-control': 'public, max-age=29030400, immutable', + etag + }) + }) + return getResponseWithCustomHeaders(res, RESOLUTION_LAYERS.SHORTCUT, RESOLUTION_IDENTIFIERS.IF_NONE_MATCH) + } + } + const cidDenylistResponse = await env.CID_VERIFIER.fetch(`${env.CID_VERIFIER_URL}/denylist?cid=${cid}`) if (cidDenylistResponse.status !== 204) { return cidDenylistResponse @@ -207,7 +224,7 @@ async function getFromDotstorage (request, env, cid, options = {}) { headers }) - if (!response.ok) { + if (!response.ok && response.status !== 304) { throw new Error() } @@ -223,7 +240,7 @@ async function getFromDotstorage (request, env, cid, options = {}) { }) // @ts-ignore 'response' does not exist on type 'GatewayResponseFailure' - if (!gwResponse?.response.ok) { + if (!gwResponse?.response.ok && gwResponse?.response.status !== 304) { throw new Error() } diff --git a/packages/edge-gateway/test/index.spec.js b/packages/edge-gateway/test/index.spec.js index b4a8e64..6e644b0 100644 --- a/packages/edge-gateway/test/index.spec.js +++ b/packages/edge-gateway/test/index.spec.js @@ -114,3 +114,71 @@ test('Gets response error when all fail to resolve', async (t) => { const body = await response.text() t.assert(body) }) + +test('Gets shortcut 304 response when if-none-match request header sent', async (t) => { + const { mf } = t.context + const cidStr = 'bafkreidwgoyc2f7n5vmwbcabbckwa6ejes4ujyncyq6xec5gt5nrm5hzga' + const response = await mf.dispatchFetch(`https://${cidStr}.ipfs.localhost:8787`, { + headers: { + 'if-none-match': `"${cidStr}"` + } + }) + await response.waitUntil() + t.is(response.status, 304) + t.is(response.headers.get('etag'), `"${cidStr}"`) + t.is(response.headers.get('x-dotstorage-resolution-layer'), 'shortcut') + t.is(response.headers.get('x-dotstorage-resolution-id'), 'if-none-match') + const body = await response.text() + t.is(body, '') +}) + +test('Gets shortcut 304 response when if-none-match request header sent with weak etag', async (t) => { + const { mf } = t.context + const cidStr = 'bafkreidwgoyc2f7n5vmwbcabbckwa6ejes4ujyncyq6xec5gt5nrm5hzga' + const response = await mf.dispatchFetch(`https://${cidStr}.ipfs.localhost:8787`, { + headers: { + 'if-none-match': `W/"${cidStr}"` + } + }) + await response.waitUntil() + t.is(response.status, 304) + t.is(response.headers.get('etag'), `"${cidStr}"`) + t.is(response.headers.get('x-dotstorage-resolution-layer'), 'shortcut') + t.is(response.headers.get('x-dotstorage-resolution-id'), 'if-none-match') + const body = await response.text() + t.is(body, '') +}) + +test('No 304 response when if-none-match request header sent with weak bad etag', async (t) => { + const { mf } = t.context + const cidStr = 'bafkreidwgoyc2f7n5vmwbcabbckwa6ejes4ujyncyq6xec5gt5nrm5hzga' + const response = await mf.dispatchFetch(`https://${cidStr}.ipfs.localhost:8787`, { + headers: { + 'if-none-match': `W/"${cidStr.substring(cidStr.length - 2)}"` + } + }) + await response.waitUntil() + t.not(response.status, 304) + t.not(response.headers.get('x-dotstorage-resolution-layer'), 'shortcut') + t.not(response.headers.get('x-dotstorage-resolution-id'), 'if-none-match') +}) + +test('Gets 304 response from upstream when if-none-match request header sent with path', async (t) => { + const { mf } = t.context + const root = 'bafybeiaekuoonpqpmems3uapy27zsas5p6ylku53lzkaufnvt4s5n6a7au' + const child = 'bafkreib6uzgr2noyzup3uuqcp6gafddnx6n3iinkyflbrkhdhfpcoggc5u' + const path = '/sample.html' + const response = await mf.dispatchFetch(`https://${root}.ipfs.localhost:8787${path}`, { + headers: { + 'if-none-match': `W/"${child}"` + } + }) + await response.waitUntil() + t.is(response.status, 304) + // TODO: why is etag not set on 304 response? + // t.is(response.headers.get('etag'), `"${child}"`) + t.not(response.headers.get('x-dotstorage-resolution-layer'), 'shortcut') + t.not(response.headers.get('x-dotstorage-resolution-id'), 'if-none-match') + const body = await response.text() + t.is(body, '') +}) diff --git a/packages/edge-gateway/test/mocks/ipfs.io/get_ipfs#@cid#path.js b/packages/edge-gateway/test/mocks/ipfs.io/get_ipfs#@cid#path.js index ff005b8..0f1b8f7 100644 --- a/packages/edge-gateway/test/mocks/ipfs.io/get_ipfs#@cid#path.js +++ b/packages/edge-gateway/test/mocks/ipfs.io/get_ipfs#@cid#path.js @@ -13,6 +13,25 @@ module.exports = (request) => { } } + if (new URL(request.url).pathname === '/ipfs/bafybeigdcrbrc7rzrphb6d4jgvajtq27mlzb7pnutakzvrpq3bnkznl6em/repeat.txt') { + if (request.headers['if-none-match'] === 'W/"bafkreidwgoyc2f7n5vmwbcabbckwa6ejes4ujyncyq6xec5gt5nrm5hzga"') { + return { + statusCode: 304, + headers: { + etag: 'bafkreidwgoyc2f7n5vmwbcabbckwa6ejes4ujyncyq6xec5gt5nrm5hzga' + }, + body: '' + } + } + return { + statusCode: 200, + headers: { + etag: 'bafkreidwgoyc2f7n5vmwbcabbckwa6ejes4ujyncyq6xec5gt5nrm5hzga' + }, + body: '🔁' + } + } + return { statusCode: 200, headers: { diff --git a/packages/ipfs-gateway-race/lib/index.js b/packages/ipfs-gateway-race/lib/index.js index 0f3cd76..2c312de 100644 --- a/packages/ipfs-gateway-race/lib/index.js +++ b/packages/ipfs-gateway-race/lib/index.js @@ -69,7 +69,7 @@ export class IpfsGatewayRacer { try { winnerGwResponse = await pAny(gatewayResponsePromises, { // @ts-ignore 'response' does not exist on type 'GatewayResponseFailure' - filter: (res) => res.response?.ok + filter: (res) => res.response?.ok || res.response?.status === 304 }) // Abort race contestants once race has a winner diff --git a/packages/ipfs-gateway-race/test/index.spec.js b/packages/ipfs-gateway-race/test/index.spec.js index 2efe9e6..02ad4f5 100644 --- a/packages/ipfs-gateway-race/test/index.spec.js +++ b/packages/ipfs-gateway-race/test/index.spec.js @@ -1,5 +1,6 @@ import { test } from './utils/setup.js' import { addFixtures } from './utils/fixtures.js' +import { Headers } from '@web-std/fetch' import { GenericContainer, Wait } from 'testcontainers' import pDefer from 'p-defer' import pSettle from 'p-settle' @@ -54,6 +55,29 @@ test('Gets response from cid and pathname', async (t) => { t.is(await response.text(), 'Hello gateway.nft.storage resource!') }) +/** + * Return on a 304 (from upstream or directly from our cache) where a + * valid `if-none-match: ` header is sent on the request. + * + * An agent will send `if-none-match: ` when they previously requested + * the content and we provided the cid as an etag on the response. + * + * > The If-None-Match HTTP request header makes the request conditional... + * > When the condition fails for GET and HEAD methods, then the server + * > must return HTTP status code 304 (Not Modified) + * @see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match + */ +test('Gets 304 response from cid and valid if-none-match header', async (t) => { + const { gwRacer } = t.context + const cid = 'bafkreidwgoyc2f7n5vmwbcabbckwa6ejes4ujyncyq6xec5gt5nrm5hzga' + const headers = new Headers({ 'if-none-match': `"${cid}"` }) + const response = await gwRacer.get(cid, { headers }) + + t.assert(response) + t.is(response.status, 304) + t.is(await response.text(), '', '304 body should be empty') +}) + test('Aborts other race contestants once there is a winner', async (t) => { const { gwRacer } = t.context const defer = pDefer() diff --git a/packages/ipfs-gateway-race/test/mocks/cf-ipfs.com/get_ipfs#@cid.js b/packages/ipfs-gateway-race/test/mocks/cf-ipfs.com/get_ipfs#@cid.js index a396e5f..e01e239 100644 --- a/packages/ipfs-gateway-race/test/mocks/cf-ipfs.com/get_ipfs#@cid.js +++ b/packages/ipfs-gateway-race/test/mocks/cf-ipfs.com/get_ipfs#@cid.js @@ -15,9 +15,9 @@ module.exports = async ({ params, headers }) => { headers: responseHeaders, body: 'Hello dot.storage! 😎' } - } else if ( - cid === 'bafkreibehzafi6gdvlyue5lzxa3rfobvp452kylox6f4vwqpd4xbr53uqu' - ) { + } + + if (cid === 'bafkreibehzafi6gdvlyue5lzxa3rfobvp452kylox6f4vwqpd4xbr53uqu') { // Delays 300ms await new Promise((resolve) => setTimeout(resolve, 500)) return { @@ -27,6 +27,20 @@ module.exports = async ({ params, headers }) => { } } + if ( + cid === 'bafkreidwgoyc2f7n5vmwbcabbckwa6ejes4ujyncyq6xec5gt5nrm5hzga' && + headers['if-none-match'] === `"${cid}"` + ) { + return { + statusCode: 304, + body: undefined, // smoke ignores statusCode if body is not present! + headers: { + etag: cid, + 'cache-control': 'public, max-age=29030400, immutable' + } + } + } + return { statusCode: 524, headers: responseHeaders, diff --git a/packages/ipfs-gateway-race/test/mocks/ipfs.io/get_ipfs#@cid.js b/packages/ipfs-gateway-race/test/mocks/ipfs.io/get_ipfs#@cid.js index 9593727..e0d5bb5 100644 --- a/packages/ipfs-gateway-race/test/mocks/ipfs.io/get_ipfs#@cid.js +++ b/packages/ipfs-gateway-race/test/mocks/ipfs.io/get_ipfs#@cid.js @@ -15,9 +15,9 @@ module.exports = async ({ params, headers }) => { headers: responseHeaders, body: 'Hello dot.storage! 😎' } - } else if ( - cid === 'bafkreibehzafi6gdvlyue5lzxa3rfobvp452kylox6f4vwqpd4xbr53uqu' - ) { + } + + if (cid === 'bafkreibehzafi6gdvlyue5lzxa3rfobvp452kylox6f4vwqpd4xbr53uqu') { return { statusCode: 200, headers: responseHeaders, @@ -25,6 +25,20 @@ module.exports = async ({ params, headers }) => { } } + if ( + cid === 'bafkreidwgoyc2f7n5vmwbcabbckwa6ejes4ujyncyq6xec5gt5nrm5hzga' && + headers['if-none-match'] === `"${cid}"` + ) { + return { + statusCode: 304, + body: undefined, // smoke ignores statusCode if body is not present! + headers: { + etag: cid, + 'cache-control': 'public, max-age=29030400, immutable' + } + } + } + return { statusCode: 500, headers: responseHeaders