Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(gw): directory URL normalization #9123

Merged
merged 2 commits into from
Jul 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions core/corehttp/gateway_handler_unixfs_dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,30 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
}
originalUrlPath := requestURI.Path

// Check if directory has index.html, if so, serveFile
idxPath := ipath.Join(contentPath, "index.html")
idx, err := i.api.Unixfs().Get(ctx, idxPath)
switch err.(type) {
case nil:
cpath := contentPath.String()
dirwithoutslash := cpath[len(cpath)-1] != '/'
// Ensure directory paths end with '/'
if originalUrlPath[len(originalUrlPath)-1] != '/' {
// don't redirect to trailing slash if it's go get
// https://github.com/ipfs/kubo/pull/3963
goget := r.URL.Query().Get("go-get") == "1"
if dirwithoutslash && !goget {
// See comment above where originalUrlPath is declared.
if !goget {
suffix := "/"
// preserve query parameters
if r.URL.RawQuery != "" {
// preserve query parameters
suffix = suffix + "?" + r.URL.RawQuery
}

// /ipfs/cid/foo?bar must be redirected to /ipfs/cid/foo/?bar
redirectURL := originalUrlPath + suffix
logger.Debugw("serving index.html file", "to", redirectURL, "status", http.StatusFound, "path", idxPath)
http.Redirect(w, r, redirectURL, http.StatusFound)
logger.Debugw("directory location moved permanently", "status", http.StatusMovedPermanently)
http.Redirect(w, r, redirectURL, http.StatusMovedPermanently)
return
}
}

// Check if directory has index.html, if so, serveFile
idxPath := ipath.Join(contentPath, "index.html")
idx, err := i.api.Unixfs().Get(ctx, idxPath)
switch err.(type) {
case nil:
f, ok := idx.(files.File)
if !ok {
internalWebError(w, files.ErrNotReader)
Expand Down Expand Up @@ -163,7 +165,7 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
// add the correct link depending on whether the path ends with a slash
default:
if strings.HasSuffix(backLink, "/") {
backLink += "./.."
backLink += ".."
} else {
backLink += "/.."
}
Expand Down
16 changes: 8 additions & 8 deletions core/corehttp/gateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -380,9 +380,9 @@ func TestIPNSHostnameRedirect(t *testing.T) {
t.Fatal(err)
}

// expect 302 redirect to same path, but with trailing slash
if res.StatusCode != 302 {
t.Errorf("status is %d, expected 302", res.StatusCode)
// expect 301 redirect to same path, but with trailing slash
if res.StatusCode != 301 {
t.Errorf("status is %d, expected 301", res.StatusCode)
}
hdr := res.Header["Location"]
if len(hdr) < 1 {
Expand All @@ -403,9 +403,9 @@ func TestIPNSHostnameRedirect(t *testing.T) {
t.Fatal(err)
}

// expect 302 redirect to same path, but with prefix and trailing slash
if res.StatusCode != 302 {
t.Errorf("status is %d, expected 302", res.StatusCode)
// expect 301 redirect to same path, but with prefix and trailing slash
if res.StatusCode != 301 {
t.Errorf("status is %d, expected 301", res.StatusCode)
}
hdr = res.Header["Location"]
if len(hdr) < 1 {
Expand Down Expand Up @@ -492,7 +492,7 @@ func TestIPNSHostnameBacklinks(t *testing.T) {
if !matchPathOrBreadcrumbs(s, "/ipns/<a href=\"//example.net/\">example.net</a>/<a href=\"//example.net/foo%3F%20%23%3C%27\">foo? #&lt;&#39;</a>") {
t.Fatalf("expected a path in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/./..\">") {
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/..\">") {
t.Fatalf("expected backlink in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/file.txt\">") {
Expand Down Expand Up @@ -566,7 +566,7 @@ func TestIPNSHostnameBacklinks(t *testing.T) {
if !matchPathOrBreadcrumbs(s, "/ipns/<a href=\"//example.net/\">example.net</a>/<a href=\"//example.net/foo%3F%20%23%3C%27\">foo? #&lt;&#39;</a>/<a href=\"//example.net/foo%3F%20%23%3C%27/bar\">bar</a>") {
t.Fatalf("expected a path in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/bar/./..\">") {
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/bar/..\">") {
t.Fatalf("expected backlink in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/bar/file.txt\">") {
Expand Down
8 changes: 8 additions & 0 deletions test/sharness/t0110-gateway.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,17 @@ test_expect_success "GET IPFS directory file output looks good" '

test_expect_success "GET IPFS directory with index.html returns redirect to add trailing slash" "
curl -sI -o response_without_slash \"http://127.0.0.1:$port/ipfs/$HASH2/dirwithindex?query=to-remember\" &&
test_should_contain \"HTTP/1.1 301 Moved Permanently\" response_without_slash &&
test_should_contain \"Location: /ipfs/$HASH2/dirwithindex/?query=to-remember\" response_without_slash
"

# This enables go get to parse go-import meta tags from index.html files stored in IPFS
# https://github.com/ipfs/kubo/pull/3963
test_expect_success "GET IPFS directory with index.html and no trailing slash returns expected output when go-get is passed" "
curl -s -o response_with_slash \"http://127.0.0.1:$port/ipfs/$HASH2/dirwithindex?go-get=1\" &&
test_should_contain \"hello i am a webpage\" response_with_slash
"

test_expect_success "GET IPFS directory with index.html and trailing slash returns expected output" "
curl -s -o response_with_slash \"http://127.0.0.1:$port/ipfs/$HASH2/dirwithindex/?query=to-remember\" &&
test_should_contain \"hello i am a webpage\" response_with_slash
Expand Down
2 changes: 1 addition & 1 deletion test/sharness/t0113-gateway-symlink.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ test_expect_success "Add the test directory" '
'

test_expect_success "Test the directory listing" '
curl "$GWAY_ADDR/ipfs/$HASH" > list_response &&
curl "$GWAY_ADDR/ipfs/$HASH/" > list_response &&
test_should_contain ">foo<" list_response &&
test_should_contain ">bar<" list_response
'
Expand Down
4 changes: 2 additions & 2 deletions test/sharness/t0114-gateway-subdomains.sh
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ test_expect_success "valid file and subdirectory paths in directory listing at {

test_expect_success "valid parent directory path in directory listing at {cid}.ipfs.localhost/sub/dir" '
curl -s --resolve $DIR_HOSTNAME:127.0.0.1 "http://$DIR_HOSTNAME/ipfs/ipns/" > list_response &&
test_should_contain "<a href=\"/ipfs/ipns/./..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/bar\">bar</a>" list_response
'

Expand Down Expand Up @@ -441,7 +441,7 @@ test_expect_success "valid file and directory paths in directory listing at {cid

test_expect_success "valid parent directory path in directory listing at {cid}.ipfs.example.com/sub/dir" '
curl -s -H "Host: $DIR_FQDN" http://127.0.0.1:$GWAY_PORT/ipfs/ipns/ > list_response &&
test_should_contain "<a href=\"/ipfs/ipns/./..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/bar\">bar</a>" list_response
'

Expand Down
32 changes: 25 additions & 7 deletions test/sharness/t0115-gateway-dir-listing.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,14 @@ test_expect_success "path gw: backlink on root CID should be hidden" '
test_should_not_contain "<a href=\"/ipfs/$DIR_CID/\">..</a>" list_response
'

test_expect_success "path gw: Etag should be present" '
test_expect_success "path gw: redirect dir listing to URL with trailing slash" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę > list_response &&
test_should_contain "HTTP/1.1 301 Moved Permanently" list_response &&
test_should_contain "Location: /ipfs/${DIR_CID}/%c4%85/%c4%99/" list_response
'

test_expect_success "path gw: Etag should be present" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "Etag: \"DirIndex-" list_response
'
Expand Down Expand Up @@ -72,31 +78,37 @@ test_expect_success "path gw: hash column should be a CID link with filename par
DIR_HOSTNAME="${DIR_CID}.ipfs.localhost"
# note: we skip DNS lookup by running curl with --resolve $DIR_HOSTNAME:127.0.0.1

test_expect_success "path gw: backlink on root CID should be hidden" '
test_expect_success "subdomain gw: backlink on root CID should be hidden" '
curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_not_contain "<a href=\"/\">..</a>" list_response
'

test_expect_success "path gw: Etag should be present" '
test_expect_success "subdomain gw: redirect dir listing to URL with trailing slash" '
curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ą/ę > list_response &&
test_should_contain "HTTP/1.1 301 Moved Permanently" list_response &&
test_should_contain "Location: /%c4%85/%c4%99/" list_response
'

test_expect_success "subdomain gw: Etag should be present" '
curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ą/ę/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "Etag: \"DirIndex-" list_response
'

test_expect_success "path gw: backlink on subdirectory should point at parent directory" '
test_expect_success "subdomain gw: backlink on subdirectory should point at parent directory" '
test_should_contain "<a href=\"/%C4%85/%C4%99/..\">..</a>" list_response
'

test_expect_success "subdomain gw: breadcrumbs should leverage path-based router mounted on the parent domain" '
test_should_contain "/ipfs/<a href=\"//localhost:$GWAY_PORT/ipfs/$DIR_CID\">$DIR_CID</a>/<a href=\"//localhost:$GWAY_PORT/ipfs/$DIR_CID/%C4%85\">ą</a>/<a href=\"//localhost:$GWAY_PORT/ipfs/$DIR_CID/%C4%85/%C4%99\">ę</a>" list_response
'

test_expect_success "path gw: name column should be a link to content root mounted at subdomain origin" '
test_expect_success "subdomain gw: name column should be a link to content root mounted at subdomain origin" '
test_should_contain "<a href=\"/%C4%85/%C4%99/file-%C5%BA%C5%82.txt\">file-źł.txt</a>" list_response
'

test_expect_success "path gw: hash column should be a CID link to path router with filename param" '
test_expect_success "subdomain gw: hash column should be a CID link to path router with filename param" '
test_should_contain "<a class=\"ipfs-hash\" translate=\"no\" href=\"//localhost:$GWAY_PORT/ipfs/$FILE_CID?filename=file-%25C5%25BA%25C5%2582.txt\">" list_response
'

Expand All @@ -121,8 +133,14 @@ test_expect_success "dnslink gw: backlink on root CID should be hidden" '
test_should_not_contain "<a href=\"/\">..</a>" list_response
'

test_expect_success "dnslink gw: Etag should be present" '
test_expect_success "dnslink gw: redirect dir listing to URL with trailing slash" '
curl -sD - --resolve $DNSLINK_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DNSLINK_HOSTNAME:$GWAY_PORT/ą/ę > list_response &&
test_should_contain "HTTP/1.1 301 Moved Permanently" list_response &&
test_should_contain "Location: /%c4%85/%c4%99/" list_response
'

test_expect_success "dnslink gw: Etag should be present" '
curl -sD - --resolve $DNSLINK_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DNSLINK_HOSTNAME:$GWAY_PORT/ą/ę/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "Etag: \"DirIndex-" list_response
'
Expand Down