Skip to content

Commit

Permalink
feat: add etag based on params for car
Browse files Browse the repository at this point in the history
- removes duplicate TAR format ETag check
- getTag now adds W/ for TAR and CAR
- adds check for dag index etags
  • Loading branch information
hacdias committed May 31, 2023
1 parent 3f29917 commit 7647b01
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 88 deletions.
59 changes: 40 additions & 19 deletions gateway/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ func (i *handler) getOrHeadHandler(w http.ResponseWriter, r *http.Request) {

// Detect when If-None-Match HTTP header allows returning HTTP 304 Not Modified
// TODO: Handle If-None-Match for CAR files once what goes in the ETag is resolved
ifNoneMatchResolvedPath, ok := i.handleIfNoneMatch(w, r, responseFormat, contentPath, immutableContentPath, logger)
ifNoneMatchResolvedPath, ok := i.handleIfNoneMatch(w, r, responseFormat, contentPath, immutableContentPath)
if !ok {
return
}
Expand Down Expand Up @@ -413,9 +413,12 @@ func panicHandler(w http.ResponseWriter) {
}
}

func addCacheControlHeaders(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, fileCid cid.Cid) (modtime time.Time) {
// Set Etag to based on CID (override whatever was set before)
w.Header().Set("Etag", getEtag(r, fileCid))
func addCacheControlHeaders(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, cid cid.Cid, responseFormat string) (modtime time.Time) {
// Best effort attempt to set an Etag based on the CID and response format.
// Setting an ETag is handled separately for CARs and IPNS records.
if etag := getEtag(r, cid, responseFormat); etag != "" {
w.Header().Set("Etag", etag)
}

// Set Cache-Control and Last-Modified based on contentPath properties
if contentPath.Mutable() {
Expand Down Expand Up @@ -519,9 +522,9 @@ func getFilename(contentPath ipath.Path) string {
}

// etagMatch evaluates if we can respond with HTTP 304 Not Modified
// It supports multiple weak and strong etags passed in If-None-Matc stringh
// It supports multiple weak and strong etags passed in If-None-Match string
// including the wildcard one.
func etagMatch(ifNoneMatchHeader string, cidEtag string, dirEtag string) bool {
func etagMatch(ifNoneMatchHeader string, etagsToCheck ...string) bool {
buf := ifNoneMatchHeader
for {
buf = textproto.TrimString(buf)
Expand All @@ -541,9 +544,12 @@ func etagMatch(ifNoneMatchHeader string, cidEtag string, dirEtag string) bool {
break
}
// Check for match both strong and weak etags
if etagWeakMatch(etag, cidEtag) || etagWeakMatch(etag, dirEtag) {
return true
for _, etagToCheck := range etagsToCheck {
if etagWeakMatch(etag, etagToCheck) {
return true
}
}

buf = remain
}
return false
Expand Down Expand Up @@ -583,19 +589,31 @@ func etagWeakMatch(a, b string) bool {
return strings.TrimPrefix(a, "W/") == strings.TrimPrefix(b, "W/")
}

// generate Etag value based on HTTP request and CID
func getEtag(r *http.Request, cid cid.Cid) string {
// getEtag generates an ETag value based on an HTTP Request, a CID and a response
// format. This function DOES NOT generate ETags for CARs or IPNS Records.
func getEtag(r *http.Request, cid cid.Cid, responseFormat string) string {
prefix := `"`
suffix := `"`
responseFormat, _, err := customResponseFormat(r)
if err == nil && responseFormat != "" {

switch responseFormat {
case "":
// Do nothing.
case "application/vnd.ipld.car", "application/vnd.ipfs.ipns-record":
// CARs and IPNS Record ETags are handled differently, in their respective handler.
return ""
case "application/x-tar":
// Weak Etag W/ for formats that we can't guarantee byte-for-byte identical
// responses, but still want to benefit from HTTP Caching.
prefix = "W/" + prefix
fallthrough
default:
// application/vnd.ipld.foo → foo
// application/x-bar → x-bar
shortFormat := responseFormat[strings.LastIndexAny(responseFormat, "/.")+1:]
// Etag: "cid.shortFmt" (gives us nice compression together with Content-Disposition in block (raw) and car responses)
suffix = `.` + shortFormat + suffix
}
// TODO: include selector suffix when https://github.com/ipfs/kubo/issues/8769 lands

return prefix + cid.String() + suffix
}

Expand Down Expand Up @@ -664,9 +682,9 @@ func debugStr(path string) string {
return q
}

func (i *handler) handleIfNoneMatch(w http.ResponseWriter, r *http.Request, responseFormat string, contentPath ipath.Path, imPath ImmutablePath, logger *zap.SugaredLogger) (ipath.Resolved, bool) {
func (i *handler) handleIfNoneMatch(w http.ResponseWriter, r *http.Request, responseFormat string, contentPath ipath.Path, imPath ImmutablePath) (ipath.Resolved, bool) {
// Detect when If-None-Match HTTP header allows returning HTTP 304 Not Modified
if inm := r.Header.Get("If-None-Match"); inm != "" {
if ifNoneMatch := r.Header.Get("If-None-Match"); ifNoneMatch != "" {
pathMetadata, err := i.api.ResolvePath(r.Context(), imPath)
if err != nil {
// Note: webError will replace http.StatusInternalServerError with a more appropriate error (e.g. StatusNotFound, StatusRequestTimeout, StatusServiceUnavailable, etc.) if necessary
Expand All @@ -677,11 +695,14 @@ func (i *handler) handleIfNoneMatch(w http.ResponseWriter, r *http.Request, resp

resolvedPath := pathMetadata.LastSegment
pathCid := resolvedPath.Cid()
// need to check against both File and Dir Etag variants
// because this inexpensive check happens before we do any I/O
cidEtag := getEtag(r, pathCid)

// Checks against both file, dir listing, and dag index Etags.
// This is an inexpensive check, and it happens before we do any I/O.
cidEtag := getEtag(r, pathCid, responseFormat)
dirEtag := getDirListingEtag(pathCid)
if etagMatch(inm, cidEtag, dirEtag) {
dagEtag := getDagIndexEtag(pathCid)

if etagMatch(ifNoneMatch, cidEtag, dirEtag, dagEtag) {
// Finish early if client already has a matching Etag
w.WriteHeader(http.StatusNotModified)
return nil, false
Expand Down
2 changes: 1 addition & 1 deletion gateway/handler_block.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func (i *handler) serveRawBlock(ctx context.Context, w http.ResponseWriter, r *h
setContentDispositionHeader(w, name, "attachment")

// Set remaining headers
modtime := addCacheControlHeaders(w, r, contentPath, blockCid)
modtime := addCacheControlHeaders(w, r, contentPath, blockCid, "application/vnd.ipld.raw")
w.Header().Set("Content-Type", "application/vnd.ipld.raw")
w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^)

Expand Down
131 changes: 82 additions & 49 deletions gateway/handler_car.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"strings"
"time"

"github.com/cespare/xxhash/v2"
ipath "github.com/ipfs/boxo/coreiface/path"
"github.com/ipfs/go-cid"

Expand Down Expand Up @@ -39,47 +40,13 @@ func (i *handler) serveCAR(ctx context.Context, w http.ResponseWriter, r *http.R
return false
}

queryParams := r.URL.Query()
rangeStr, hasRange := queryParams.Get(carRangeBytesKey), queryParams.Has(carRangeBytesKey)
scopeStr, hasScope := queryParams.Get(carTerminalElementTypeKey), queryParams.Has(carTerminalElementTypeKey)

params := CarParams{}
if hasRange {
rng, err := rangeStrToByteRange(rangeStr)
if err != nil {
err = fmt.Errorf("invalid entity-bytes: %w", err)
i.webError(w, r, err, http.StatusBadRequest)
return false
}
params.Range = &rng
}

if hasScope {
switch s := DagScope(scopeStr); s {
case dagScopeEntity, dagScopeAll, dagScopeBlock:
params.Scope = s
default:
err := fmt.Errorf("unsupported dag-scope %s", scopeStr)
i.webError(w, r, err, http.StatusBadRequest)
return false
}
} else {
params.Scope = dagScopeAll
}

carFile, err := i.api.GetCAR(ctx, imPath, params)
if !i.handleRequestErrors(w, r, contentPath, err) {
params, err := getCarParams(r)
if err != nil {
i.webError(w, r, err, http.StatusBadRequest)
return false
}
defer carFile.Close()

imPathStr := imPath.String()
if !strings.HasPrefix(imPathStr, "/ipfs/") {
i.webError(w, r, fmt.Errorf("path does not have /ipfs/ prefix"), http.StatusInternalServerError)
return false
}
firstSegment, _, _ := strings.Cut(imPathStr[6:], "/")
rootCid, err := cid.Decode(firstSegment)
rootCid, err := getCarRootCid(imPath)
if err != nil {
i.webError(w, r, err, http.StatusInternalServerError)
return false
Expand All @@ -97,23 +64,25 @@ func (i *handler) serveCAR(ctx context.Context, w http.ResponseWriter, r *http.R
setContentDispositionHeader(w, name, "attachment")

// Set Cache-Control (same logic as for a regular files)
addCacheControlHeaders(w, r, contentPath, rootCid)

// TODO: What Etag to use here, the full path?
// Weak Etag W/ because we can't guarantee byte-for-byte identical
// responses, but still want to benefit from HTTP Caching. Two CAR
// responses for the same CID and selector will be logically equivalent,
// but when CAR is streamed, then in theory, blocks may arrive from
// datastore in non-deterministic order.
etag := `W/` + getEtag(r, rootCid)
addCacheControlHeaders(w, r, contentPath, rootCid, "application/vnd.ipld.car")

// Generate the CAR Etag.
etag := getCarEtag(r, imPath, params, rootCid)
w.Header().Set("Etag", etag)

// Finish early if Etag match
if r.Header.Get("If-None-Match") == etag {
// Terminate early if Etag matches. We cannot rely on handleIfNoneMatch since
// since it does not contain the parameters information we retrieve here.
if etagMatch(r.Header.Get("If-None-Match"), etag) {
w.WriteHeader(http.StatusNotModified)
return false
}

carFile, err := i.api.GetCAR(ctx, imPath, params)
if !i.handleRequestErrors(w, r, contentPath, err) {
return false
}
defer carFile.Close()

// Make it clear we don't support range-requests over a car stream
// Partial downloads and resumes should be handled using requests for
// sub-DAGs and IPLD selectors: https://github.com/ipfs/go-ipfs/issues/8769
Expand Down Expand Up @@ -142,6 +111,36 @@ func (i *handler) serveCAR(ctx context.Context, w http.ResponseWriter, r *http.R
return true
}

func getCarParams(r *http.Request) (CarParams, error) {
queryParams := r.URL.Query()
rangeStr, hasRange := queryParams.Get(carRangeBytesKey), queryParams.Has(carRangeBytesKey)
scopeStr, hasScope := queryParams.Get(carTerminalElementTypeKey), queryParams.Has(carTerminalElementTypeKey)

params := CarParams{}
if hasRange {
rng, err := rangeStrToByteRange(rangeStr)
if err != nil {
err = fmt.Errorf("invalid entity-bytes: %w", err)
return CarParams{}, err
}
params.Range = &rng
}

if hasScope {
switch s := DagScope(scopeStr); s {
case dagScopeEntity, dagScopeAll, dagScopeBlock:
params.Scope = s
default:
err := fmt.Errorf("unsupported dag-scope %s", scopeStr)
return CarParams{}, err
}
} else {
params.Scope = dagScopeAll
}

return params, nil
}

func rangeStrToByteRange(rangeStr string) (DagEntityByteRange, error) {
rangeElems := strings.Split(rangeStr, ":")
if len(rangeElems) != 2 {
Expand Down Expand Up @@ -177,3 +176,37 @@ func rangeStrToByteRange(rangeStr string) (DagEntityByteRange, error) {
To: &to,
}, nil
}

func getCarRootCid(imPath ImmutablePath) (cid.Cid, error) {
imPathStr := imPath.String()
if !strings.HasPrefix(imPathStr, "/ipfs/") {
return cid.Undef, fmt.Errorf("path does not have /ipfs/ prefix")
}

firstSegment, _, _ := strings.Cut(imPathStr[6:], "/")
rootCid, err := cid.Decode(firstSegment)
if err != nil {
return cid.Undef, err
}

return rootCid, err
}

func getCarEtag(r *http.Request, imPath ImmutablePath, params CarParams, rootCid cid.Cid) string {
data := imPath.String()
if params.Scope != dagScopeAll {
data += "." + string(params.Scope)
}

if params.Range != nil {
if params.Range.From != 0 || params.Range.To != nil {
data += "." + strconv.FormatInt(params.Range.From, 10)
if params.Range.To != nil {
data += "." + strconv.FormatInt(*params.Range.To, 10)
}
}
}

suffix := strconv.FormatUint(xxhash.Sum64([]byte(data)), 32)
return `W/"` + rootCid.String() + ".car." + suffix + `"`
}
4 changes: 2 additions & 2 deletions gateway/handler_codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *htt
responseContentType = cidContentType
}

// Set HTTP headers (for caching etc)
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid())
// Set HTTP headers (for caching, etc). Etag will be replaced if handled by serveCodecHTML.
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid(), responseContentType)
name := setCodecContentDisposition(w, r, resolvedPath, responseContentType)
w.Header().Set("Content-Type", responseContentType)
w.Header().Set("X-Content-Type-Options", "nosniff")
Expand Down
8 changes: 8 additions & 0 deletions gateway/handler_ipns_record.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ func (i *handler) serveIpnsRecord(ctx context.Context, w http.ResponseWriter, r
// TODO: use addCacheControlHeaders once #1818 is fixed.
recordEtag := strconv.FormatUint(xxhash.Sum64(rawRecord), 32)
w.Header().Set("Etag", recordEtag)

// Terminate early if Etag matches. We cannot rely on handleIfNoneMatch since
// we use the raw record to generate the etag value.
if etagMatch(r.Header.Get("If-None-Match"), recordEtag) {
w.WriteHeader(http.StatusNotModified)
return false
}

if record.Ttl != nil {
seconds := int(time.Duration(*record.Ttl).Seconds())
w.Header().Set("Cache-Control", fmt.Sprintf("public, max-age=%d", seconds))
Expand Down
16 changes: 1 addition & 15 deletions gateway/handler_tar.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,7 @@ func (i *handler) serveTAR(ctx context.Context, w http.ResponseWriter, r *http.R
rootCid := pathMetadata.LastSegment.Cid()

// Set Cache-Control and read optional Last-Modified time
modtime := addCacheControlHeaders(w, r, contentPath, rootCid)

// Weak Etag W/ because we can't guarantee byte-for-byte identical
// responses, but still want to benefit from HTTP Caching. Two TAR
// responses for the same CID will be logically equivalent,
// but when TAR is streamed, then in theory, files and directories
// may arrive in different order (depends on TAR lib and filesystem/inodes).
etag := `W/` + getEtag(r, rootCid)
w.Header().Set("Etag", etag)

// Finish early if Etag match
if r.Header.Get("If-None-Match") == etag {
w.WriteHeader(http.StatusNotModified)
return false
}
modtime := addCacheControlHeaders(w, r, contentPath, rootCid, "application/x-tar")

// Set Content-Disposition
var name string
Expand Down
2 changes: 1 addition & 1 deletion gateway/handler_unixfs__redirects.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ func (i *handler) serve4xx(w http.ResponseWriter, r *http.Request, content4xxPat
log.Debugf("using _redirects: custom %d file at %q", status, content4xxPath)
w.Header().Set("Content-Type", "text/html")
w.Header().Set("Content-Length", strconv.FormatInt(size, 10))
addCacheControlHeaders(w, r, content4xxPath, content4xxCid)
addCacheControlHeaders(w, r, content4xxPath, content4xxCid, "")
w.WriteHeader(status)
_, err = io.CopyN(w, content4xxFile, size)
return err
Expand Down
2 changes: 1 addition & 1 deletion gateway/handler_unixfs_file.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func (i *handler) serveFile(ctx context.Context, w http.ResponseWriter, r *http.
defer span.End()

// Set Cache-Control and read optional Last-Modified time
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid())
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid(), "")

// Set Content-Disposition
name := addContentDispositionHeader(w, r, contentPath)
Expand Down

0 comments on commit 7647b01

Please sign in to comment.