Skip to content

Commit

Permalink
feat: X-Ipfs-Roots for smarter HTTP caches (#8720)
Browse files Browse the repository at this point in the history
  • Loading branch information
lidel authored Mar 1, 2022
1 parent 10ff11b commit caba3b2
Show file tree
Hide file tree
Showing 3 changed files with 204 additions and 0 deletions.
54 changes: 54 additions & 0 deletions core/corehttp/gateway_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,13 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
w.Header().Set("X-IPFS-Path", urlPath)
w.Header().Set("Etag", responseEtag)

if rootCids, err := i.buildIpfsRootsHeader(urlPath, r); err == nil {
w.Header().Set("X-Ipfs-Roots", rootCids)
} else { // this should never happen, as we resolved the urlPath already
webError(w, "error while resolving X-Ipfs-Roots", err, http.StatusInternalServerError)
return
}

// set these headers _after_ the error, for we may just not have it
// and don't want the client to cache a 500 response...
// and only if it's /ipfs!
Expand Down Expand Up @@ -391,6 +398,9 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
internalWebError(w, files.ErrNotReader)
return
}
// static index.html → no need to generate dynamic dir-index-html
// replace mutable DirIndex Etag with immutable dir CID
w.Header().Set("Etag", `"`+resolvedPath.Cid().String()+`"`)

logger.Debugw("serving index.html file", "path", idxPath)
// write to request
Expand Down Expand Up @@ -785,6 +795,50 @@ func (i *gatewayHandler) addUserHeaders(w http.ResponseWriter) {
}
}

// Set X-Ipfs-Roots with logical CID array for efficient HTTP cache invalidation.
func (i *gatewayHandler) buildIpfsRootsHeader(contentPath string, r *http.Request) (string, error) {
/*
These are logical roots where each CID represent one path segment
and resolves to either a directory or the root block of a file.
The main purpose of this header is allow HTTP caches to do smarter decisions
around cache invalidation (eg. keep specific subdirectory/file if it did not change)
A good example is Wikipedia, which is HAMT-sharded, but we only care about
logical roots that represent each segment of the human-readable content
path:
Given contentPath = /ipns/en.wikipedia-on-ipfs.org/wiki/Block_of_Wikipedia_in_Turkey
rootCidList is a generated by doing `ipfs resolve -r` on each sub path:
/ipns/en.wikipedia-on-ipfs.org → bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze
/ipns/en.wikipedia-on-ipfs.org/wiki/ → bafybeihn2f7lhumh4grizksi2fl233cyszqadkn424ptjajfenykpsaiw4
/ipns/en.wikipedia-on-ipfs.org/wiki/Block_of_Wikipedia_in_Turkey → bafkreibn6euazfvoghepcm4efzqx5l3hieof2frhp254hio5y7n3hv5rma
The result is an ordered array of values:
X-Ipfs-Roots: bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze,bafybeihn2f7lhumh4grizksi2fl233cyszqadkn424ptjajfenykpsaiw4,bafkreibn6euazfvoghepcm4efzqx5l3hieof2frhp254hio5y7n3hv5rma
Note that while the top one will change every time any article is changed,
the last root (responsible for specific article) may not change at all.
*/
var sp strings.Builder
var pathRoots []string
pathSegments := strings.Split(contentPath[6:], "/")
sp.WriteString(contentPath[:5]) // /ipfs or /ipns
for _, root := range pathSegments {
if root == "" {
continue
}
sp.WriteString("/")
sp.WriteString(root)
resolvedSubPath, err := i.api.ResolvePath(r.Context(), ipath.New(sp.String()))
if err != nil {
return "", err
}
pathRoots = append(pathRoots, resolvedSubPath.Cid().String())
}
rootCidList := strings.Join(pathRoots, ",") // convention from rfc2616#sec4.2
return rootCidList, nil
}

func webError(w http.ResponseWriter, message string, err error, defaultCode int) {
if _, ok := err.(resolver.ErrNoLink); ok {
webErrorWithCode(w, message, err, http.StatusNotFound)
Expand Down
150 changes: 150 additions & 0 deletions test/sharness/t0116-gateway-cache.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#!/usr/bin/env bash

test_description="Test HTTP Gateway Cache Control Support"

. lib/test-lib.sh

test_init_ipfs
test_launch_ipfs_daemon_without_network

# Cache control support is based on logical roots (each path segment == one logical root).
# To maximize the test surface, we want to test:
# - /ipfs/ content path
# - /ipns/ content path
# - at least 3 levels
# - separate tests for a directory listing and a file
# - have implicit index.html for a good measure
# /ipns/root1/root2/root3/ (/ipns/root1/root2/root3/index.html)

# Note: we cover important edge case here:
# ROOT3_CID - dir listing (dir-index-html response)
# ROOT4_CID - index.html returned as a root response (dir/), instead of generated dir-index-html
# FILE_CID - index.html returned directly, as a file

test_expect_success "Add the test directory" '
mkdir -p root2/root3/root4 &&
echo "hello" > root2/root3/root4/index.html &&
ROOT1_CID=$(ipfs add -Qrw --cid-version 1 root2)
ROOT2_CID=$(ipfs resolve -r /ipfs/$ROOT1_CID/root2 | cut -d "/" -f3)
ROOT3_CID=$(ipfs resolve -r /ipfs/$ROOT1_CID/root2/root3 | cut -d "/" -f3)
ROOT4_CID=$(ipfs resolve -r /ipfs/$ROOT1_CID/root2/root3/root4 | cut -d "/" -f3)
FILE_CID=$(ipfs resolve -r /ipfs/$ROOT1_CID/root2/root3/root4/index.html | cut -d "/" -f3)
'

test_expect_success "Prepare IPNS unixfs content path for testing" '
TEST_IPNS_ID=$(ipfs key gen --ipns-base=base36 --type=ed25519 cache_test_key | head -n1 | tr -d "\n")
ipfs name publish --key cache_test_key --allow-offline -Q "/ipfs/$ROOT1_CID" > name_publish_out &&
test_check_peerid "${TEST_IPNS_ID}" &&
ipfs name resolve "${TEST_IPNS_ID}" > output &&
printf "/ipfs/%s\n" "$ROOT1_CID" > expected &&
test_cmp expected output
'

# GET /ipfs/
test_expect_success "GET for /ipfs/ unixfs dir listing succeeds" '
curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/" >/dev/null 2>curl_ipfs_dir_listing_output &&
cat curl_ipfs_dir_listing_output
'
test_expect_success "GET for /ipfs/ unixfs dir with index.html succeeds" '
curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/" >/dev/null 2>curl_ipfs_dir_index.html_output &&
cat curl_ipfs_dir_index.html_output
'
test_expect_success "GET for /ipfs/ unixfs file succeeds" '
curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_ipfs_file_output &&
cat curl_ipfs_file_output
'
# GET /ipns/
test_expect_success "GET for /ipns/ unixfs dir listing succeeds" '
curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipns/$TEST_IPNS_ID/root2/root3/" >/dev/null 2>curl_ipns_dir_listing_output &&
cat curl_ipns_dir_listing_output
'
test_expect_success "GET for /ipns/ unixfs dir with index.html succeeds" '
curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipns/$TEST_IPNS_ID/root2/root3/root4/" >/dev/null 2>curl_ipns_dir_index.html_output &&
cat curl_ipns_dir_index.html_output
'
test_expect_success "GET for /ipns/ unixfs file succeeds" '
curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipns/$TEST_IPNS_ID/root2/root3/root4/index.html" >/dev/null 2>curl_ipns_file_output &&
cat curl_ipns_file_output
'

# X-Ipfs-Path

## dir generated listing
test_expect_success "GET /ipfs/ dir listing response has original content path in X-Ipfs-Path" '
grep "< X-Ipfs-Path: /ipfs/$ROOT1_CID/root2/root3" curl_ipfs_dir_listing_output
'
test_expect_success "GET /ipns/ dir listing response has original content path in X-Ipfs-Path" '
grep "< X-Ipfs-Path: /ipns/$TEST_IPNS_ID/root2/root3" curl_ipns_dir_listing_output
'

## dir static index.html
test_expect_success "GET /ipfs/ dir index.html response has original content path in X-Ipfs-Path" '
grep "< X-Ipfs-Path: /ipfs/$ROOT1_CID/root2/root3/root4/" curl_ipfs_dir_index.html_output
'
test_expect_success "GET /ipns/ dir index.html response has original content path in X-Ipfs-Path" '
grep "< X-Ipfs-Path: /ipns/$TEST_IPNS_ID/root2/root3/root4/" curl_ipns_dir_index.html_output
'

# file
test_expect_success "GET /ipfs/ file response has original content path in X-Ipfs-Path" '
grep "< X-Ipfs-Path: /ipfs/$ROOT1_CID/root2/root3/root4/index.html" curl_ipfs_file_output
'
test_expect_success "GET /ipns/ file response has original content path in X-Ipfs-Path" '
grep "< X-Ipfs-Path: /ipns/$TEST_IPNS_ID/root2/root3/root4/index.html" curl_ipns_file_output
'

# X-Ipfs-Roots

## dir generated listing
test_expect_success "GET /ipfs/ dir listing response has logical CID roots in X-Ipfs-Roots" '
grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID}" curl_ipfs_dir_listing_output
'
test_expect_success "GET /ipns/ dir listing response has logical CID roots in X-Ipfs-Roots" '
grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID}" curl_ipns_dir_listing_output
'

## dir static index.html
test_expect_success "GET /ipfs/ dir index.html response has logical CID roots in X-Ipfs-Roots" '
grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID},${ROOT4_CID}" curl_ipfs_dir_index.html_output
'
test_expect_success "GET /ipns/ dir index.html response has logical CID roots in X-Ipfs-Roots" '
grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID},${ROOT4_CID}" curl_ipns_dir_index.html_output
'

## file
test_expect_success "GET /ipfs/ file response has logical CID roots in X-Ipfs-Roots" '
grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID},${ROOT4_CID},${FILE_CID}" curl_ipfs_file_output
'
test_expect_success "GET /ipns/ file response has logical CID roots in X-Ipfs-Roots" '
grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID},${ROOT4_CID},${FILE_CID}" curl_ipns_file_output
'

# Etag

## dir generated listing
test_expect_success "GET /ipfs/ dir response has special Etag for generated dir listing" '
grep -E "< Etag: \"DirIndex-.+_CID-${ROOT3_CID}\"" curl_ipfs_dir_listing_output
'
test_expect_success "GET /ipns/ dir response has special Etag for generated dir listing" '
grep -E "< Etag: \"DirIndex-.+_CID-${ROOT3_CID}\"" curl_ipns_dir_listing_output
'

## dir static index.html should use CID of the index.html file for improved HTTP caching
test_expect_success "GET /ipfs/ dir index.html response has dir CID as Etag" '
grep "< Etag: \"${ROOT4_CID}\"" curl_ipfs_dir_index.html_output
'
test_expect_success "GET /ipns/ dir index.html response has dir CID as Etag" '
grep "< Etag: \"${ROOT4_CID}\"" curl_ipns_dir_index.html_output
'

## file
test_expect_success "GET /ipfs/ response has CID as Etag for a file" '
grep "< Etag: \"${FILE_CID}\"" curl_ipfs_file_output
'
test_expect_success "GET /ipns/ response has CID as Etag for a file" '
grep "< Etag: \"${FILE_CID}\"" curl_ipns_file_output
'

test_kill_ipfs_daemon

test_done
File renamed without changes.

0 comments on commit caba3b2

Please sign in to comment.