From 7d7faed585c4a4319b2aa93d68ce05d7958e6f26 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 19 Jul 2022 01:17:37 +0200 Subject: [PATCH] fix(gw): ensure dir URLs have trailing slash This fixes a regression around directory listing and index.html hosting. Seems that during one of recent refactors code changed and we no longer check for trailing slash in HTTP request path, but look at content path instead. This cleans this up and also ensures dir behavior is the same for both index.html hosting and dir-index-html (generated listing). It also adds more tests so we catch any future regressions. --- core/corehttp/gateway_handler_unixfs_dir.go | 30 ++++++++++--------- core/corehttp/gateway_test.go | 16 +++++------ test/sharness/t0110-gateway.sh | 1 + test/sharness/t0113-gateway-symlink.sh | 2 +- test/sharness/t0114-gateway-subdomains.sh | 4 +-- test/sharness/t0115-gateway-dir-listing.sh | 32 ++++++++++++++++----- 6 files changed, 53 insertions(+), 32 deletions(-) diff --git a/core/corehttp/gateway_handler_unixfs_dir.go b/core/corehttp/gateway_handler_unixfs_dir.go index 4b8b7bc1f4de..4580159d1d6c 100644 --- a/core/corehttp/gateway_handler_unixfs_dir.go +++ b/core/corehttp/gateway_handler_unixfs_dir.go @@ -41,28 +41,30 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit } originalUrlPath := requestURI.Path - // Check if directory has index.html, if so, serveFile - idxPath := ipath.Join(resolvedPath, "index.html") - idx, err := i.api.Unixfs().Get(ctx, idxPath) - switch err.(type) { - case nil: - cpath := contentPath.String() - dirwithoutslash := cpath[len(cpath)-1] != '/' + // Ensure directory paths end with '/' + if originalUrlPath[len(originalUrlPath)-1] != '/' { + // don't redirect to trailing slash if it's go get + // https://github.com/ipfs/kubo/pull/3963 goget := r.URL.Query().Get("go-get") == "1" - if dirwithoutslash && !goget { - // See comment above where originalUrlPath is declared. + if !goget { suffix := "/" + // preserve query parameters if r.URL.RawQuery != "" { - // preserve query parameters suffix = suffix + "?" + r.URL.RawQuery } - + // /ipfs/cid/foo?bar must be redirected to /ipfs/cid/foo/?bar redirectURL := originalUrlPath + suffix - logger.Debugw("serving index.html file", "to", redirectURL, "status", http.StatusFound, "path", idxPath) - http.Redirect(w, r, redirectURL, http.StatusFound) + logger.Debugw("directory location moved permanently", "status", http.StatusMovedPermanently) + http.Redirect(w, r, redirectURL, http.StatusMovedPermanently) return } + } + // Check if directory has index.html, if so, serveFile + idxPath := ipath.Join(resolvedPath, "index.html") + idx, err := i.api.Unixfs().Get(ctx, idxPath) + switch err.(type) { + case nil: f, ok := idx.(files.File) if !ok { internalWebError(w, files.ErrNotReader) @@ -163,7 +165,7 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit // add the correct link depending on whether the path ends with a slash default: if strings.HasSuffix(backLink, "/") { - backLink += "./.." + backLink += ".." } else { backLink += "/.." } diff --git a/core/corehttp/gateway_test.go b/core/corehttp/gateway_test.go index 5ac27341d5d9..b6b504907aaa 100644 --- a/core/corehttp/gateway_test.go +++ b/core/corehttp/gateway_test.go @@ -380,9 +380,9 @@ func TestIPNSHostnameRedirect(t *testing.T) { t.Fatal(err) } - // expect 302 redirect to same path, but with trailing slash - if res.StatusCode != 302 { - t.Errorf("status is %d, expected 302", res.StatusCode) + // expect 301 redirect to same path, but with trailing slash + if res.StatusCode != 301 { + t.Errorf("status is %d, expected 301", res.StatusCode) } hdr := res.Header["Location"] if len(hdr) < 1 { @@ -403,9 +403,9 @@ func TestIPNSHostnameRedirect(t *testing.T) { t.Fatal(err) } - // expect 302 redirect to same path, but with prefix and trailing slash - if res.StatusCode != 302 { - t.Errorf("status is %d, expected 302", res.StatusCode) + // expect 301 redirect to same path, but with prefix and trailing slash + if res.StatusCode != 301 { + t.Errorf("status is %d, expected 301", res.StatusCode) } hdr = res.Header["Location"] if len(hdr) < 1 { @@ -492,7 +492,7 @@ func TestIPNSHostnameBacklinks(t *testing.T) { if !matchPathOrBreadcrumbs(s, "/ipns/example.net/foo? #<'") { t.Fatalf("expected a path in directory listing") } - if !strings.Contains(s, "") { + if !strings.Contains(s, "") { t.Fatalf("expected backlink in directory listing") } if !strings.Contains(s, "") { @@ -566,7 +566,7 @@ func TestIPNSHostnameBacklinks(t *testing.T) { if !matchPathOrBreadcrumbs(s, "/ipns/example.net/foo? #<'/bar") { t.Fatalf("expected a path in directory listing") } - if !strings.Contains(s, "") { + if !strings.Contains(s, "") { t.Fatalf("expected backlink in directory listing") } if !strings.Contains(s, "") { diff --git a/test/sharness/t0110-gateway.sh b/test/sharness/t0110-gateway.sh index 630e84648d6d..8cf28dbf41a0 100755 --- a/test/sharness/t0110-gateway.sh +++ b/test/sharness/t0110-gateway.sh @@ -72,6 +72,7 @@ test_expect_success "GET IPFS directory file output looks good" ' test_expect_success "GET IPFS directory with index.html returns redirect to add trailing slash" " curl -sI -o response_without_slash \"http://127.0.0.1:$port/ipfs/$HASH2/dirwithindex?query=to-remember\" && + test_should_contain \"HTTP/1.1 301 Moved Permanently\" response_without_slash && test_should_contain \"Location: /ipfs/$HASH2/dirwithindex/?query=to-remember\" response_without_slash " diff --git a/test/sharness/t0113-gateway-symlink.sh b/test/sharness/t0113-gateway-symlink.sh index d1f94bde7e98..9fa7ffa6edf5 100755 --- a/test/sharness/t0113-gateway-symlink.sh +++ b/test/sharness/t0113-gateway-symlink.sh @@ -22,7 +22,7 @@ test_expect_success "Add the test directory" ' ' test_expect_success "Test the directory listing" ' - curl "$GWAY_ADDR/ipfs/$HASH" > list_response && + curl "$GWAY_ADDR/ipfs/$HASH/" > list_response && test_should_contain ">foo<" list_response && test_should_contain ">bar<" list_response ' diff --git a/test/sharness/t0114-gateway-subdomains.sh b/test/sharness/t0114-gateway-subdomains.sh index f9ab0d82427c..0dad1e95c1a3 100755 --- a/test/sharness/t0114-gateway-subdomains.sh +++ b/test/sharness/t0114-gateway-subdomains.sh @@ -268,7 +268,7 @@ test_expect_success "valid file and subdirectory paths in directory listing at { test_expect_success "valid parent directory path in directory listing at {cid}.ipfs.localhost/sub/dir" ' curl -s --resolve $DIR_HOSTNAME:127.0.0.1 "http://$DIR_HOSTNAME/ipfs/ipns/" > list_response && - test_should_contain ".." list_response && + test_should_contain ".." list_response && test_should_contain "bar" list_response ' @@ -441,7 +441,7 @@ test_expect_success "valid file and directory paths in directory listing at {cid test_expect_success "valid parent directory path in directory listing at {cid}.ipfs.example.com/sub/dir" ' curl -s -H "Host: $DIR_FQDN" http://127.0.0.1:$GWAY_PORT/ipfs/ipns/ > list_response && - test_should_contain ".." list_response && + test_should_contain ".." list_response && test_should_contain "bar" list_response ' diff --git a/test/sharness/t0115-gateway-dir-listing.sh b/test/sharness/t0115-gateway-dir-listing.sh index bd634388df50..bb84203bc020 100755 --- a/test/sharness/t0115-gateway-dir-listing.sh +++ b/test/sharness/t0115-gateway-dir-listing.sh @@ -43,8 +43,14 @@ test_expect_success "path gw: backlink on root CID should be hidden" ' test_should_not_contain ".." list_response ' -test_expect_success "path gw: Etag should be present" ' +test_expect_success "path gw: redirect dir listing to URL with trailing slash" ' curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę > list_response && + test_should_contain "HTTP/1.1 301 Moved Permanently" list_response && + test_should_contain "Location: /ipfs/${DIR_CID}/%c4%85/%c4%99/" list_response +' + +test_expect_success "path gw: Etag should be present" ' + curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ > list_response && test_should_contain "Index of" list_response && test_should_contain "Etag: \"DirIndex-" list_response ' @@ -72,19 +78,25 @@ test_expect_success "path gw: hash column should be a CID link with filename par DIR_HOSTNAME="${DIR_CID}.ipfs.localhost" # note: we skip DNS lookup by running curl with --resolve $DIR_HOSTNAME:127.0.0.1 -test_expect_success "path gw: backlink on root CID should be hidden" ' +test_expect_success "subdomain gw: backlink on root CID should be hidden" ' curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ > list_response && test_should_contain "Index of" list_response && test_should_not_contain ".." list_response ' -test_expect_success "path gw: Etag should be present" ' +test_expect_success "subdomain gw: redirect dir listing to URL with trailing slash" ' curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ą/ę > list_response && + test_should_contain "HTTP/1.1 301 Moved Permanently" list_response && + test_should_contain "Location: /%c4%85/%c4%99/" list_response +' + +test_expect_success "subdomain gw: Etag should be present" ' + curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ą/ę/ > list_response && test_should_contain "Index of" list_response && test_should_contain "Etag: \"DirIndex-" list_response ' -test_expect_success "path gw: backlink on subdirectory should point at parent directory" ' +test_expect_success "subdomain gw: backlink on subdirectory should point at parent directory" ' test_should_contain ".." list_response ' @@ -92,11 +104,11 @@ test_expect_success "subdomain gw: breadcrumbs should leverage path-based router test_should_contain "/ipfs/$DIR_CID/ą/ę" list_response ' -test_expect_success "path gw: name column should be a link to content root mounted at subdomain origin" ' +test_expect_success "subdomain gw: name column should be a link to content root mounted at subdomain origin" ' test_should_contain "file-źł.txt" list_response ' -test_expect_success "path gw: hash column should be a CID link to path router with filename param" ' +test_expect_success "subdomain gw: hash column should be a CID link to path router with filename param" ' test_should_contain "" list_response ' @@ -121,8 +133,14 @@ test_expect_success "dnslink gw: backlink on root CID should be hidden" ' test_should_not_contain ".." list_response ' -test_expect_success "dnslink gw: Etag should be present" ' +test_expect_success "dnslink gw: redirect dir listing to URL with trailing slash" ' curl -sD - --resolve $DNSLINK_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DNSLINK_HOSTNAME:$GWAY_PORT/ą/ę > list_response && + test_should_contain "HTTP/1.1 301 Moved Permanently" list_response && + test_should_contain "Location: /%c4%85/%c4%99/" list_response +' + +test_expect_success "dnslink gw: Etag should be present" ' + curl -sD - --resolve $DNSLINK_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DNSLINK_HOSTNAME:$GWAY_PORT/ą/ę/ > list_response && test_should_contain "Index of" list_response && test_should_contain "Etag: \"DirIndex-" list_response '