Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Commit

Permalink
frontend: Use Cloud search for Go import badges (#30350)
Browse files Browse the repository at this point in the history
This commit fixes our Go import badges on sourcegraph.com, which have
been broken since api.godoc.org was brought down. There's a [tracking
issue](golang/go#36785) to re-introduce an
API for pkg.go.dev, but it's not implemented yet.

Instead, we implement a best-effort alternative that relies on searching
`go.mod` files for the given repo name on sourcegraph.com.
Sometimes packages will have different canonical names than their
repositories, and we don't handle that case.

There are [many open-source projects that have a broken badge](https://sourcegraph.com/search?q=context:global+sourcegraph.com+-/badge.svg&patternType=regexp).
After this lands, they will recover.

Co-authored-by: Erik Seliger <erikseliger@me.com>
Co-authored-by: Keegan Carruthers-Smith <keegan@sourcegraph.com>
  • Loading branch information
3 people authored Jan 28, 2022
1 parent c3e5464 commit 52fead1
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 204 deletions.
168 changes: 73 additions & 95 deletions cmd/frontend/backend/go_importers.go
Original file line number Diff line number Diff line change
@@ -1,40 +1,34 @@
package backend

import (
"bytes"
"context"
"encoding/json"
"io"
"net/http"
"path"
"path/filepath"
"sort"
"net/url"
"regexp"
"strconv"
"strings"
"time"

"github.com/cockroachdb/errors"

"github.com/sourcegraph/sourcegraph/cmd/frontend/envvar"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/authz"
"github.com/sourcegraph/sourcegraph/internal/database"
"github.com/sourcegraph/sourcegraph/internal/api/internalapi"
"github.com/sourcegraph/sourcegraph/internal/httpcli"
"github.com/sourcegraph/sourcegraph/internal/rcache"
"github.com/sourcegraph/sourcegraph/internal/vcs/git"
)

var MockCountGoImporters func(ctx context.Context, repo api.RepoName) (int, error)

var (
countGoImportersHTTPClient = httpcli.ExternalDoer
goImportersCountCache = rcache.NewWithTTL("go-importers-count", 14400) // 4 hours
goImportersCountCache = rcache.NewWithTTL("go-importers-count", 14400) // 4 hours
)

// CountGoImporters returns the number of Go importers for the repository's Go subpackages. This is
// a special case used only on Sourcegraph.com for repository badges.
//
// TODO: The import path is not always the same as the repository name.
func CountGoImporters(ctx context.Context, db database.DB, repo api.RepoName) (count int, err error) {
func CountGoImporters(ctx context.Context, cli httpcli.Doer, repo api.RepoName) (count int, err error) {
if MockCountGoImporters != nil {
return MockCountGoImporters(ctx, repo)
}
Expand Down Expand Up @@ -62,109 +56,93 @@ func CountGoImporters(ctx context.Context, db database.DB, repo api.RepoName) (c
}
}()

ctx, cancel := context.WithTimeout(ctx, 20*time.Second) // avoid tying up resources unduly
defer cancel()

// Find all (possible) Go packages in the repository.
goPackages, err := listGoPackagesInRepoImprecise(ctx, db, repo)
if err != nil {
return 0, err
}
const maxSubpackages = 50 // arbitrary limit to avoid overloading api.godoc.org
if len(goPackages) > maxSubpackages {
goPackages = goPackages[:maxSubpackages]
var q struct {
Query string
Variables map[string]interface{}
}

// Count importers for each of the repository's Go packages.
//
// TODO: The count sums together the user counts of all of the repository's subpackages. This
// overcounts the number of users, because if another project uses multiple subpackages in this
// repository, it is counted multiple times. This limitation is now documented and will be
// addressed in the future. See https://github.com/sourcegraph/sourcegraph/issues/2663.
for _, pkg := range goPackages {
// Assumes the import path is the same as the repo name - not always true!
req, err := http.NewRequest("GET", "https://api.godoc.org/importers/"+pkg, nil)
if err != nil {
return 0, err
}

response, err := countGoImportersHTTPClient.Do(req.WithContext(ctx))
if err != nil {
return 0, err
}
defer response.Body.Close()

var result struct {
Results []struct {
Path string
}
}
bytes, err := io.ReadAll(response.Body)
if err != nil {
return 0, err
}
err = json.Unmarshal(bytes, &result)
if err != nil {
return 0, err
}
count += len(result.Results)
q.Query = countGoImportersGraphQLQuery
q.Variables = map[string]interface{}{
"query": countGoImportersSearchQuery(repo),
}

return count, nil
}
body, err := json.Marshal(q)
if err != nil {
return 0, err
}

// listGoPackagesInRepoImprecise returns a list of import paths for all (probable) Go packages in
// the repository. It computes the list based solely on the repository name (as a prefix) and
// filenames in the repository; it does not parse or build the Go files to determine the list
// precisely.
func listGoPackagesInRepoImprecise(ctx context.Context, db database.DB, repoName api.RepoName) ([]string, error) {
if !envvar.SourcegraphDotComMode() {
// 🚨 SECURITY: Avoid leaking information about private repositories that the viewer is not
// allowed to access.
return nil, errors.New(
"listGoPackagesInRepoImprecise is only supported on Sourcegraph.com for public repositories",
)
rawurl, err := gqlURL("CountGoImporters")
if err != nil {
return 0, err
}

repo, err := NewRepos(db.Repos()).GetByName(ctx, repoName)
req, err := http.NewRequestWithContext(ctx, "POST", rawurl, bytes.NewReader(body))
if err != nil {
return nil, err
return 0, err
}

commitID, err := git.ResolveRevision(ctx, repo.Name, "HEAD", git.ResolveRevisionOptions{})
resp, err := cli.Do(req)
if err != nil {
return nil, err
return 0, err
}
fis, err := git.ReadDir(ctx, authz.DefaultSubRepoPermsChecker, repo.Name, commitID, "", true)
defer resp.Body.Close()

respBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
return 0, errors.Wrap(err, "ReadBody")
}

subpaths := map[string]struct{}{} // all non-vendor/internal/hidden dirs containing *.go files
for _, fi := range fis {
if name := fi.Name(); filepath.Ext(name) == ".go" {
dir := path.Dir(name)
if isPossibleExternallyImportableGoPackageDir(dir) {
subpaths[dir] = struct{}{}
}
var v struct {
Data struct {
Search struct{ Results struct{ MatchCount int } }
}
Errors []interface{}
}

if err := json.Unmarshal(respBody, &v); err != nil {
return 0, errors.Wrap(err, "Decode")
}

importPaths := make([]string, 0, len(subpaths))
for subpath := range subpaths {
importPaths = append(importPaths, path.Join(string(repo.Name), subpath))
if len(v.Errors) > 0 {
return 0, errors.Errorf("graphql: errors: %v", v.Errors)
}
sort.Strings(importPaths)
return importPaths, nil

return v.Data.Search.Results.MatchCount, nil
}

func isPossibleExternallyImportableGoPackageDir(dirPath string) bool {
components := strings.Split(dirPath, "/")
for _, c := range components {
if (strings.HasPrefix(c, ".") && len(c) > 1) || strings.HasPrefix(c, "_") || c == "vendor" || c == "internal" ||
c == "testdata" {
return false
}
// gqlURL returns the frontend's internal GraphQL API URL, with the given ?queryName parameter
// which is used to keep track of the source and type of GraphQL queries.
func gqlURL(queryName string) (string, error) {
u, err := url.Parse(internalapi.Client.URL)
if err != nil {
return "", err
}
return true
u.Path = "/.internal/graphql"
u.RawQuery = queryName
return u.String(), nil
}

func countGoImportersSearchQuery(repo api.RepoName) string {
//
// Walk-through of the regular expression:
// - ^\s* to not match the repo inside replace blocks which have a $repo => $replacement $version format.
// - (/\S+)? to match sub-packages or packages at different versions (e.g. github.com/tsenart/vegeta/v12)
// - \s+ to match spaces between repo name and version identifier
// - v\d to match beginning of version identifier
//
// See: https://sourcegraph.com/search?q=context:global+type:file+f:%28%5E%7C/%29go%5C.mod%24+content:%5E%5Cs*github%5C.com/tsenart/vegeta%28/%5CS%2B%29%3F%5Cs%2Bv%5Cd+visibility:public+count:all&patternType=regexp
return strings.Join([]string{
`type:file`,
`f:(^|/)go\.mod$`,
`patterntype:regexp`,
`content:^\s*` + regexp.QuoteMeta(string(repo)) + `(/\S+)?\s+v\d`,
`count:all`,
`visibility:public`,
`timeout:20s`,
}, " ")
}

const countGoImportersGraphQLQuery = `
query CountGoImporters($query: String!) {
search(query: $query) { results { matchCount } }
}`
107 changes: 0 additions & 107 deletions cmd/frontend/backend/go_importers_test.go

This file was deleted.

3 changes: 2 additions & 1 deletion cmd/frontend/internal/app/badge.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/sourcegraph/sourcegraph/cmd/frontend/backend"
"github.com/sourcegraph/sourcegraph/cmd/frontend/internal/routevar"
"github.com/sourcegraph/sourcegraph/internal/database"
"github.com/sourcegraph/sourcegraph/internal/httpcli"
)

// TODO(slimsag): once https://github.com/badges/shields/pull/828 is merged,
Expand All @@ -20,7 +21,7 @@ import (

// NOTE: Keep in sync with services/backend/httpapi/repo_shield.go
func badgeValue(r *http.Request, db database.DB) (int, error) {
totalRefs, err := backend.CountGoImporters(r.Context(), db, routevar.ToRepo(mux.Vars(r)))
totalRefs, err := backend.CountGoImporters(r.Context(), httpcli.InternalDoer, routevar.ToRepo(mux.Vars(r)))
if err != nil {
return 0, errors.Wrap(err, "Defs.TotalRefs")
}
Expand Down
3 changes: 2 additions & 1 deletion cmd/frontend/internal/httpapi/repo_shield.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@ import (
"github.com/sourcegraph/sourcegraph/cmd/frontend/backend"
"github.com/sourcegraph/sourcegraph/cmd/frontend/internal/routevar"
"github.com/sourcegraph/sourcegraph/internal/database"
"github.com/sourcegraph/sourcegraph/internal/httpcli"
)

// NOTE: Keep in sync with services/backend/httpapi/repo_shield.go
func badgeValue(r *http.Request, db database.DB) (int, error) {
totalRefs, err := backend.CountGoImporters(r.Context(), db, routevar.ToRepo(mux.Vars(r)))
totalRefs, err := backend.CountGoImporters(r.Context(), httpcli.InternalDoer, routevar.ToRepo(mux.Vars(r)))
if err != nil {
return 0, errors.Wrap(err, "Defs.TotalRefs")
}
Expand Down

0 comments on commit 52fead1

Please sign in to comment.