Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

frontend: Use Cloud search for Go import badges #30350

Merged
merged 7 commits into from
Jan 28, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 64 additions & 93 deletions cmd/frontend/backend/go_importers.go
Original file line number Diff line number Diff line change
@@ -1,26 +1,23 @@
package backend

import (
"bytes"
"context"
"encoding/json"
"io"
"net/http"
"path"
"path/filepath"
"sort"
"net/url"
"regexp"
"strconv"
"strings"
"time"

"github.com/cockroachdb/errors"

"github.com/sourcegraph/sourcegraph/cmd/frontend/envvar"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/authz"
"github.com/sourcegraph/sourcegraph/internal/database"
"github.com/sourcegraph/sourcegraph/internal/api/internalapi"
"github.com/sourcegraph/sourcegraph/internal/httpcli"
"github.com/sourcegraph/sourcegraph/internal/rcache"
"github.com/sourcegraph/sourcegraph/internal/vcs/git"
)

var MockCountGoImporters func(ctx context.Context, repo api.RepoName) (int, error)
Expand All @@ -32,9 +29,7 @@ var (

// CountGoImporters returns the number of Go importers for the repository's Go subpackages. This is
// a special case used only on Sourcegraph.com for repository badges.
//
// TODO: The import path is not always the same as the repository name.
func CountGoImporters(ctx context.Context, db database.DB, repo api.RepoName) (count int, err error) {
func CountGoImporters(ctx context.Context, cli httpcli.Doer, repo api.RepoName) (count int, err error) {
if MockCountGoImporters != nil {
return MockCountGoImporters(ctx, repo)
}
Expand Down Expand Up @@ -62,109 +57,85 @@ func CountGoImporters(ctx context.Context, db database.DB, repo api.RepoName) (c
}
}()

ctx, cancel := context.WithTimeout(ctx, 20*time.Second) // avoid tying up resources unduly
defer cancel()

// Find all (possible) Go packages in the repository.
goPackages, err := listGoPackagesInRepoImprecise(ctx, db, repo)
if err != nil {
return 0, err
}
const maxSubpackages = 50 // arbitrary limit to avoid overloading api.godoc.org
if len(goPackages) > maxSubpackages {
goPackages = goPackages[:maxSubpackages]
var q struct {
Query string
Variables map[string]interface{}
}

// Count importers for each of the repository's Go packages.
//
// TODO: The count sums together the user counts of all of the repository's subpackages. This
// overcounts the number of users, because if another project uses multiple subpackages in this
// repository, it is counted multiple times. This limitation is now documented and will be
// addressed in the future. See https://github.com/sourcegraph/sourcegraph/issues/2663.
for _, pkg := range goPackages {
// Assumes the import path is the same as the repo name - not always true!
req, err := http.NewRequest("GET", "https://api.godoc.org/importers/"+pkg, nil)
if err != nil {
return 0, err
}

response, err := countGoImportersHTTPClient.Do(req.WithContext(ctx))
if err != nil {
return 0, err
}
defer response.Body.Close()

var result struct {
Results []struct {
Path string
}
}
bytes, err := io.ReadAll(response.Body)
if err != nil {
return 0, err
}
err = json.Unmarshal(bytes, &result)
if err != nil {
return 0, err
}
count += len(result.Results)
q.Query = countGoImportersGraphQLQuery
q.Variables = map[string]interface{}{
"query": countGoImportersSearchQuery(repo),
}

return count, nil
}
body, err := json.Marshal(q)
if err != nil {
return 0, err
}

// listGoPackagesInRepoImprecise returns a list of import paths for all (probable) Go packages in
// the repository. It computes the list based solely on the repository name (as a prefix) and
// filenames in the repository; it does not parse or build the Go files to determine the list
// precisely.
func listGoPackagesInRepoImprecise(ctx context.Context, db database.DB, repoName api.RepoName) ([]string, error) {
if !envvar.SourcegraphDotComMode() {
// 🚨 SECURITY: Avoid leaking information about private repositories that the viewer is not
// allowed to access.
return nil, errors.New(
"listGoPackagesInRepoImprecise is only supported on Sourcegraph.com for public repositories",
)
rawurl, err := gqlURL("CountGoImporters")
if err != nil {
return 0, err
}

repo, err := NewRepos(db.Repos()).GetByName(ctx, repoName)
req, err := http.NewRequestWithContext(ctx, "POST", rawurl, bytes.NewReader(body))
if err != nil {
return nil, err
return 0, err
}

commitID, err := git.ResolveRevision(ctx, repo.Name, "HEAD", git.ResolveRevisionOptions{})
resp, err := cli.Do(req)
if err != nil {
return nil, err
return 0, err
}
fis, err := git.ReadDir(ctx, authz.DefaultSubRepoPermsChecker, repo.Name, commitID, "", true)
defer resp.Body.Close()

respBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
return 0, errors.Wrap(err, "ReadBody")
}

subpaths := map[string]struct{}{} // all non-vendor/internal/hidden dirs containing *.go files
for _, fi := range fis {
if name := fi.Name(); filepath.Ext(name) == ".go" {
dir := path.Dir(name)
if isPossibleExternallyImportableGoPackageDir(dir) {
subpaths[dir] = struct{}{}
}
var v struct {
Data struct {
Search struct{ Results struct{ MatchCount int } }
}
Errors []interface{}
}

if err := json.Unmarshal(respBody, &v); err != nil {
return 0, errors.Wrap(err, "Decode")
}

importPaths := make([]string, 0, len(subpaths))
for subpath := range subpaths {
importPaths = append(importPaths, path.Join(string(repo.Name), subpath))
if len(v.Errors) > 0 {
return 0, errors.Errorf("graphql: errors: %v", v.Errors)
}
sort.Strings(importPaths)
return importPaths, nil

return v.Data.Search.Results.MatchCount, nil
}

func isPossibleExternallyImportableGoPackageDir(dirPath string) bool {
components := strings.Split(dirPath, "/")
for _, c := range components {
if (strings.HasPrefix(c, ".") && len(c) > 1) || strings.HasPrefix(c, "_") || c == "vendor" || c == "internal" ||
c == "testdata" {
return false
}
// gqlURL returns the frontend's internal GraphQL API URL, with the given ?queryName parameter
// which is used to keep track of the source and type of GraphQL queries.
func gqlURL(queryName string) (string, error) {
u, err := url.Parse(internalapi.Client.URL)
if err != nil {
return "", err
}
return true
u.Path = "/.internal/graphql"
u.RawQuery = queryName
return u.String(), nil
}

func countGoImportersSearchQuery(repo api.RepoName) string {
return strings.Join([]string{
`type:file`,
`f:go\.mod`,
`patterntype:regexp`,
`content:^\s+` + regexp.QuoteMeta(string(repo)) + `\S*\s+v\S`,
tsenart marked this conversation as resolved.
Show resolved Hide resolved
`count:all`,
`visibility:public`,
`timeout:20s`,
}, " ")
}

const countGoImportersGraphQLQuery = `
query CountGoImporters($query: String!) {
search(query: $query) { results { matchCount } }
}`
107 changes: 0 additions & 107 deletions cmd/frontend/backend/go_importers_test.go

This file was deleted.

3 changes: 2 additions & 1 deletion cmd/frontend/internal/app/badge.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/sourcegraph/sourcegraph/cmd/frontend/backend"
"github.com/sourcegraph/sourcegraph/cmd/frontend/internal/routevar"
"github.com/sourcegraph/sourcegraph/internal/database"
"github.com/sourcegraph/sourcegraph/internal/httpcli"
)

// TODO(slimsag): once https://github.com/badges/shields/pull/828 is merged,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we might be able to drop this TODO as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, that hasn't been done. I'm not sure what the exact steps would be here, so I'll let @slimsag comment.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The point of this TODO is that once shields.io has official badges for Sourcegraph, then we don't need our own endpoint that serves SVGs (and the serveRepoBadge below could just redirect to shields.io directly.)

Looks like that PR was merged, so I believe we could do this today if we wanted to. If you search for Sourcegraph on https://shields.io/ you'll see it's there:

image

And if you click the URL /sourcegraph/rrc it pops up the builder UI:

image

But it seems like that badge design doesn't include the Sourcegraph logo, I guess when we added that customization on our side we never sent a PR to shields.io to add the logo there:

so yeah, idk, would it be nice to fix? maybe. but also maybe not worth the time.

Expand All @@ -20,7 +21,7 @@ import (

// NOTE: Keep in sync with services/backend/httpapi/repo_shield.go
func badgeValue(r *http.Request, db database.DB) (int, error) {
totalRefs, err := backend.CountGoImporters(r.Context(), db, routevar.ToRepo(mux.Vars(r)))
totalRefs, err := backend.CountGoImporters(r.Context(), httpcli.InternalDoer, routevar.ToRepo(mux.Vars(r)))
if err != nil {
return 0, errors.Wrap(err, "Defs.TotalRefs")
}
Expand Down
3 changes: 2 additions & 1 deletion cmd/frontend/internal/httpapi/repo_shield.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@ import (
"github.com/sourcegraph/sourcegraph/cmd/frontend/backend"
"github.com/sourcegraph/sourcegraph/cmd/frontend/internal/routevar"
"github.com/sourcegraph/sourcegraph/internal/database"
"github.com/sourcegraph/sourcegraph/internal/httpcli"
)

// NOTE: Keep in sync with services/backend/httpapi/repo_shield.go
func badgeValue(r *http.Request, db database.DB) (int, error) {
totalRefs, err := backend.CountGoImporters(r.Context(), db, routevar.ToRepo(mux.Vars(r)))
totalRefs, err := backend.CountGoImporters(r.Context(), httpcli.InternalDoer, routevar.ToRepo(mux.Vars(r)))
if err != nil {
return 0, errors.Wrap(err, "Defs.TotalRefs")
}
Expand Down