Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: set cache-control header correctly #19

Merged
merged 9 commits into from
Mar 21, 2024
5 changes: 3 additions & 2 deletions packages/verified-fetch/src/utils/parse-resource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ export async function parseResource (resource: Resource, { ipns, logger }: Parse
cid,
protocol: 'ipfs',
path: '',
query: {}
}
query: {},
ttl: 29030400 // 1 year for ipfs content
} satisfies ParsedUrlStringResults
}

throw new TypeError(`Invalid resource. Cannot determine CID from resource: ${resource}`)
Expand Down
100 changes: 81 additions & 19 deletions packages/verified-fetch/src/utils/parse-url-string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ import { peerIdFromString } from '@libp2p/peer-id'
import { CID } from 'multiformats/cid'
import { TLRU } from './tlru.js'
import type { RequestFormatShorthand } from '../types.js'
import type { IPNS, ResolveDNSLinkProgressEvents, ResolveResult } from '@helia/ipns'
import type { DNSLinkResolveResult, IPNS, IPNSResolveResult, ResolveDNSLinkProgressEvents, ResolveResult } from '@helia/ipns'
import type { ComponentLogger } from '@libp2p/interface'
import type { ProgressOptions } from 'progress-events'

const ipnsCache = new TLRU<ResolveResult>(1000)
const ipnsCache = new TLRU<DNSLinkResolveResult | IPNSResolveResult>(1000)

export interface ParseUrlStringInput {
urlString: string
Expand All @@ -23,30 +23,80 @@ export interface ParsedUrlQuery extends Record<string, string | unknown> {
filename?: string
}

export interface ParsedUrlStringResults {
protocol: string
path: string
cid: CID
interface ParsedUrlStringResultsBase extends ResolveResult {
protocol: 'ipfs' | 'ipns'
query: ParsedUrlQuery

/**
* seconds as a number
*/
ttl?: number
}

export type ParsedUrlStringResults = ParsedUrlStringResultsBase

const URL_REGEX = /^(?<protocol>ip[fn]s):\/\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/
const PATH_REGEX = /^\/(?<protocol>ip[fn]s)\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/
const PATH_GATEWAY_REGEX = /^https?:\/\/(.*[^/])\/(?<protocol>ip[fn]s)\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/
const SUBDOMAIN_GATEWAY_REGEX = /^https?:\/\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\.(?<protocol>ip[fn]s)\.([^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/

function matchURLString (urlString: string): Record<string, string> {
interface MatchUrlGroups {
protocol: 'ipfs' | 'ipns'
cidOrPeerIdOrDnsLink: string
path?: string
queryString?: string
}

function matchUrlGroupsGuard (groups?: null | { [key in string]: string; } | MatchUrlGroups): groups is MatchUrlGroups {
const protocol = groups?.protocol
if (protocol == null) return false
const cidOrPeerIdOrDnsLink = groups?.cidOrPeerIdOrDnsLink
if (cidOrPeerIdOrDnsLink == null) return false
const path = groups?.path
const queryString = groups?.queryString

return ['ipns', 'ipfs'].includes(protocol) &&
typeof cidOrPeerIdOrDnsLink === 'string' &&
(path == null || typeof path === 'string') &&
(queryString == null || typeof queryString === 'string')
}

function matchURLString (urlString: string): MatchUrlGroups {
for (const pattern of [URL_REGEX, PATH_REGEX, PATH_GATEWAY_REGEX, SUBDOMAIN_GATEWAY_REGEX]) {
const match = urlString.match(pattern)

if (match?.groups != null) {
return match.groups
if (matchUrlGroupsGuard(match?.groups)) {
return match.groups satisfies MatchUrlGroups
}
}

throw new TypeError(`Invalid URL: ${urlString}, please use ipfs://, ipns://, or gateway URLs only`)
}

/**
* determines the TTL for the resolved resource that will be used for the `Cache-Control` header's `max-age` directive.
* max-age is in seconds
*
* @see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control#response_directives
*
* If we have ipnsTtlNs, it will be a BigInt representing "nanoseconds". We need to convert it back to seconds.
*
* For more TTL nuances:
*
* @see https://github.com/ipfs/js-ipns/blob/16e0e10682fa9a663e0bb493a44d3e99a5200944/src/index.ts#L200
* @see https://github.com/ipfs/js-ipns/pull/308
*/
function calculateTtl (resolveResult?: IPNSResolveResult | DNSLinkResolveResult): number | undefined {
if (resolveResult == null) {
return undefined
}
const dnsLinkTtl = (resolveResult as DNSLinkResolveResult).answer?.TTL
const ipnsTtlNs = (resolveResult as IPNSResolveResult).record?.ttl
// For some reason, ipns "nanoseconds" are 1e-8 of a second, instead of 1e-9.
2color marked this conversation as resolved.
Show resolved Hide resolved
const ipnsTtl = ipnsTtlNs != null ? Number(ipnsTtlNs / BigInt(1e8)) : undefined
return dnsLinkTtl ?? ipnsTtl
}

/**
* For dnslinks see https://specs.ipfs.tech/http-gateways/subdomain-gateway/#host-request-header
* DNSLink names include . which means they must be inlined into a single DNS label to provide unique origin and work with wildcard TLS certificates.
Expand Down Expand Up @@ -89,32 +139,36 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin
let cid: CID | undefined
let resolvedPath: string | undefined
const errors: Error[] = []
let resolveResult: IPNSResolveResult | DNSLinkResolveResult | undefined

if (protocol === 'ipfs') {
try {
cid = CID.parse(cidOrPeerIdOrDnsLink)
/**
* no ttl set. @link {setCacheControlHeader}
*/
} catch (err) {
log.error(err)
errors.push(new TypeError('Invalid CID for ipfs://<cid> URL'))
}
} else {
let resolveResult = ipnsCache.get(cidOrPeerIdOrDnsLink)
// protocol is ipns
resolveResult = ipnsCache.get(cidOrPeerIdOrDnsLink)

if (resolveResult != null) {
cid = resolveResult.cid
resolvedPath = resolveResult.path
log.trace('resolved %s to %c from cache', cidOrPeerIdOrDnsLink, cid)
} else {
// protocol is ipns
log.trace('attempting to resolve PeerId for %s', cidOrPeerIdOrDnsLink)
log.trace('Attempting to resolve PeerId for %s', cidOrPeerIdOrDnsLink)
let peerId = null
try {
// try resolving as an IPNS name
peerId = peerIdFromString(cidOrPeerIdOrDnsLink)
resolveResult = await ipns.resolve(peerId, { onProgress: options?.onProgress })
cid = resolveResult?.cid
resolvedPath = resolveResult?.path
cid = resolveResult.cid
resolvedPath = resolveResult.path
log.trace('resolved %s to %c', cidOrPeerIdOrDnsLink, cid)
ipnsCache.set(cidOrPeerIdOrDnsLink, resolveResult, 60 * 1000 * 2)
} catch (err) {
if (peerId == null) {
log.error('could not parse PeerId string "%s"', cidOrPeerIdOrDnsLink, err)
Expand All @@ -126,6 +180,7 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin
}

if (cid == null) {
// cid is still null, try resolving as a DNSLink
let decodedDnsLinkLabel = cidOrPeerIdOrDnsLink
if (isInlinedDnsLink(cidOrPeerIdOrDnsLink)) {
decodedDnsLinkLabel = dnsLinkLabelDecoder(cidOrPeerIdOrDnsLink)
Expand All @@ -138,7 +193,6 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin
cid = resolveResult?.cid
resolvedPath = resolveResult?.path
log.trace('resolved %s to %c', decodedDnsLinkLabel, cid)
ipnsCache.set(cidOrPeerIdOrDnsLink, resolveResult, 60 * 1000 * 2)
} catch (err: any) {
log.error('could not resolve DnsLink for "%s"', cidOrPeerIdOrDnsLink, err)
errors.push(err)
Expand All @@ -155,6 +209,13 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin
throw new AggregateError(errors, `Invalid resource. Cannot determine CID from URL "${urlString}"`)
}

const ttl = calculateTtl(resolveResult)

if (resolveResult != null) {
// use the ttl for the resolved resouce for the cache, but fallback to 2 minutes if not available
ipnsCache.set(cidOrPeerIdOrDnsLink, resolveResult, ttl ?? 60 * 1000 * 2)
}

// parse query string
const query: Record<string, any> = {}

Expand All @@ -177,9 +238,10 @@ export async function parseUrlString ({ urlString, ipns, logger }: ParseUrlStrin
return {
protocol,
cid,
path: joinPaths(resolvedPath, urlPath),
query
}
path: joinPaths(resolvedPath, urlPath ?? ''),
query,
ttl
} satisfies ParsedUrlStringResults
}

/**
Expand Down
36 changes: 36 additions & 0 deletions packages/verified-fetch/src/utils/response-headers.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,39 @@
interface CacheControlHeaderOptions {
/**
* This should be seconds as a number.
*
* See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control#response_directives
*/
ttl?: number
protocol: 'ipfs' | 'ipns'
response: Response
}

/**
* Implementations may place an upper bound on any TTL received, as noted in Section 8 of [rfc2181].
* If TTL value is unknown, implementations should not send a Cache-Control
* No matter if TTL value is known or not, implementations should always send a Last-Modified header with the timestamp of the record resolution.
*
* @see https://specs.ipfs.tech/http-gateways/path-gateway/#cache-control-response-header
*/
export function setCacheControlHeader ({ ttl, protocol, response }: CacheControlHeaderOptions): void {
let headerValue: string
if (protocol === 'ipfs') {
headerValue = 'public, max-age=29030400, immutable'
} else if (ttl == null) {
/**
* default limit for unknown TTL: "use 5 minute as default fallback when it is not available."
*
* @see https://github.com/ipfs/boxo/issues/329#issuecomment-1995236409
*/
headerValue = 'public, max-age=300'
} else {
headerValue = `public, max-age=${ttl}`
}

response.headers.set('cache-control', headerValue)
}

/**
* This function returns the value of the `Content-Range` header for a given range.
* If you know the total size of the body, pass it as `byteSize`
Expand Down
8 changes: 7 additions & 1 deletion packages/verified-fetch/src/verified-fetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { getETag } from './utils/get-e-tag.js'
import { getStreamFromAsyncIterable } from './utils/get-stream-from-async-iterable.js'
import { tarStream } from './utils/get-tar-stream.js'
import { parseResource } from './utils/parse-resource.js'
import { setCacheControlHeader } from './utils/response-headers.js'
import { badRequestResponse, movedPermanentlyResponse, notAcceptableResponse, notSupportedResponse, okResponse, badRangeResponse, okRangeResponse, badGatewayResponse } from './utils/responses.js'
import { selectOutputType, queryFormatToAcceptHeader } from './utils/select-output-type.js'
import { walkPath } from './utils/walk-path.js'
Expand Down Expand Up @@ -441,11 +442,15 @@ export class VerifiedFetch {
let cid: ParsedUrlStringResults['cid']
let path: ParsedUrlStringResults['path']
let query: ParsedUrlStringResults['query']
let ttl: ParsedUrlStringResults['ttl']
let protocol: ParsedUrlStringResults['protocol']
try {
const result = await parseResource(resource, { ipns: this.ipns, logger: this.helia.logger }, options)
cid = result.cid
path = result.path
query = result.query
ttl = result.ttl
protocol = result.protocol
} catch (err) {
this.log.error('error parsing resource %s', resource, err)

Expand Down Expand Up @@ -516,7 +521,8 @@ export class VerifiedFetch {
}

response.headers.set('etag', getETag({ cid, reqFormat, weak: false }))
response.headers.set('cache-control', 'public, max-age=29030400, immutable')

setCacheControlHeader({ response, ttl, protocol })
// https://specs.ipfs.tech/http-gateways/path-gateway/#x-ipfs-path-response-header
response.headers.set('X-Ipfs-Path', resource.toString())

Expand Down
Loading
Loading