Skip to content

Commit

Permalink
Improve wikipedia tests (#286)
Browse files Browse the repository at this point in the history
  • Loading branch information
pajlada committed Mar 20, 2022
1 parent 3370ed2 commit eecd085
Show file tree
Hide file tree
Showing 10 changed files with 426 additions and 141 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- Dev: Improve Twitch.tv clip tests. (#283)
- Dev: Improve YouTube tests. (#284)
- Dev: Resolver Check now returns a context. (#287)
- Dev: Improve Wikipedia tests. (#286)

## 1.2.3

Expand Down
35 changes: 35 additions & 0 deletions internal/resolvers/wikipedia/article_context.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package wikipedia

import (
"context"
"errors"
)

type contextKey string

var (
contextLocaleCode = contextKey("localeCode")
contextArticleID = contextKey("articleID")

errMissingArticleValues = errors.New("missing article values in context")
)

func contextWithArticleValues(ctx context.Context, localeCode, articleID string) context.Context {
ctx = context.WithValue(ctx, contextLocaleCode, localeCode)
ctx = context.WithValue(ctx, contextArticleID, articleID)
return ctx
}

func articleValuesFromContext(ctx context.Context) (string, string, error) {
articleID, ok := ctx.Value(contextArticleID).(string)
if !ok {
return "", "", errMissingArticleValues
}

localeCode, ok := ctx.Value(contextLocaleCode).(string)
if !ok {
return "", "", errMissingArticleValues
}

return localeCode, articleID, nil
}
64 changes: 20 additions & 44 deletions internal/resolvers/wikipedia/article_loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,57 +5,54 @@ import (
"encoding/json"
"fmt"
"net/http"
"net/url"
"time"

"github.com/Chatterino/api/internal/logger"
"github.com/Chatterino/api/pkg/cache"
"github.com/Chatterino/api/pkg/humanize"
"github.com/Chatterino/api/pkg/resolver"
)

type ArticleLoader struct {
endpointURL string
// the apiURL format must consist of 2 %s, first being region second being article
apiURL string
}

func (l *ArticleLoader) getPageInfo(ctx context.Context, urlString string) (*wikipediaTooltipData, error) {
u, err := url.Parse(urlString)
if err != nil {
return nil, err
}
func (l *ArticleLoader) Load(ctx context.Context, unused string, r *http.Request) (*resolver.Response, time.Duration, error) {
log := logger.FromContext(ctx)

// Since the Wikipedia API is locale-dependant, we need the locale code.
// For example, if you want to resolve a de.wikipedia.org link, you need
// to ping the DE API endpoint.
localeMatch := localeRegexp.FindStringSubmatch(u.Hostname())
if len(localeMatch) != 2 {
return nil, errLocaleMatch
}

localeCode := localeMatch[1]

titleMatch := titleRegexp.FindStringSubmatch(u.Path)
if len(titleMatch) != 2 {
return nil, errTitleMatch
// If no locale is specified in the given URL, we will assume it's the english wiki article
localeCode, articleID, err := articleValuesFromContext(ctx)
if err != nil {
return nil, resolver.NoSpecialDur, err
}

canonicalName := titleMatch[1]
log.Debugw("[Wikipedia] GET",
"localeCode", localeCode,
"articleID", articleID,
)

requestURL := fmt.Sprintf(l.endpointURL, localeCode, canonicalName)
requestURL := fmt.Sprintf(l.apiURL, localeCode, articleID)

resp, err := resolver.RequestGET(ctx, requestURL)
if err != nil {
return nil, err
return nil, resolver.NoSpecialDur, err
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("bad status: %d", resp.StatusCode)
return &resolver.Response{
Status: http.StatusNotFound,
Message: "No Wikipedia article found",
}, resolver.NoSpecialDur, nil
// return nil, fmt.Errorf("bad status: %d", resp.StatusCode)
}

var pageInfo *wikipediaAPIResponse
if err = json.NewDecoder(resp.Body).Decode(&pageInfo); err != nil {
return nil, err
return resolver.Errorf("Wikipedia API unmarshal JSON error: %s", err)
}

// Transform API response into our tooltip model for Wikipedia links
Expand All @@ -76,26 +73,5 @@ func (l *ArticleLoader) getPageInfo(ctx context.Context, urlString string) (*wik
tooltipData.ThumbnailURL = pageInfo.Thumbnail.URL
}

return tooltipData, nil
}

func (l *ArticleLoader) Load(ctx context.Context, urlString string, r *http.Request) (*resolver.Response, time.Duration, error) {
log := logger.FromContext(ctx)

log.Debugw("[Wikipedia] GET",
"url", urlString,
)

tooltipData, err := l.getPageInfo(ctx, urlString)

if err != nil {
log.Debugw("[Wikipedia] Unable to get page info",
"url", urlString,
"error", err,
)

return nil, cache.NoSpecialDur, resolver.ErrDontHandle
}

return buildTooltip(tooltipData)
}
113 changes: 28 additions & 85 deletions internal/resolvers/wikipedia/article_loader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,52 +2,13 @@ package wikipedia

import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"testing"

"github.com/Chatterino/api/internal/logger"
"github.com/Chatterino/api/pkg/utils"
qt "github.com/frankban/quicktest"
"github.com/go-chi/chi/v5"
)

var (
wikiData = map[string]*wikipediaAPIResponse{}
)

func init() {
wikiData["en_test"] = &wikipediaAPIResponse{
Titles: wikipediaAPITitles{
Normalized: "Test title",
},
Extract: "Test extract",
Thumbnail: nil,
Description: utils.StringPtr("Test description"),
}

wikiData["en_test_html"] = &wikipediaAPIResponse{
Titles: wikipediaAPITitles{
Normalized: "<b>Test title</b>",
},
Extract: "<b>Test extract</b>",
Thumbnail: nil,
Description: utils.StringPtr("<b>Test description</b>"),
}

wikiData["en_test_no_description"] = &wikipediaAPIResponse{
Titles: wikipediaAPITitles{
Normalized: "Test title",
},
Extract: "Test extract",
Thumbnail: nil,
Description: nil,
}
}

func testLoadAndUnescape(ctx context.Context, loader *ArticleLoader, c *qt.C, locale, page string) (cleanTooltip string) {
urlString := fmt.Sprintf("https://%s.wikipedia.org/wiki/%s", locale, page)
response, _, err := loader.Load(ctx, urlString, nil)
Expand All @@ -62,65 +23,47 @@ func testLoadAndUnescape(ctx context.Context, loader *ArticleLoader, c *qt.C, lo
}

func TestLoad(t *testing.T) {
ctx := logger.OnContext(context.Background(), logger.NewTest())
c := qt.New(t)
r := chi.NewRouter()
r.Get("/api/rest_v1/page/summary/{locale}/{page}", func(w http.ResponseWriter, r *http.Request) {
locale := chi.URLParam(r, "locale")
page := chi.URLParam(r, "page")

var response *wikipediaAPIResponse
var ok bool
// ctx := logger.OnContext(context.Background(), logger.NewTest())
// c := qt.New(t)
// ts := testServer()
// defer ts.Close()

if response, ok = wikiData[locale+"_"+page]; !ok {
http.Error(w, http.StatusText(404), 404)
return
}
// loader := &ArticleLoader{
// apiURL: ts.URL + "/api/rest_v1/page/summary/%s/%s",
// }

b, _ := json.Marshal(&response)

w.Header().Set("Content-Type", "application/json")
w.Write(b)
})
ts := httptest.NewServer(r)
defer ts.Close()

loader := &ArticleLoader{
endpointURL: ts.URL + "/api/rest_v1/page/summary/%s/%s",
}

c.Run("Normal page", func(c *qt.C) {
const locale = "en"
const page = "test"
// c.Run("Normal page", func(c *qt.C) {
// const locale = "en"
// const page = "test"

const expectedTooltip = `<div style="text-align: left;"><b>Test title&nbsp;•&nbsp;Test description</b><br>Test extract</div>`
// const expectedTooltip = `<div style="text-align: left;"><b>Test title&nbsp;•&nbsp;Test description</b><br>Test extract</div>`

cleanTooltip := testLoadAndUnescape(ctx, loader, c, locale, page)
// cleanTooltip := testLoadAndUnescape(ctx, loader, c, locale, page)

c.Assert(cleanTooltip, qt.Equals, expectedTooltip)
})
// c.Assert(cleanTooltip, qt.Equals, expectedTooltip)
// })

c.Run("Normal page (HTML)", func(c *qt.C) {
const locale = "en"
const page = "test_html"
// c.Run("Normal page (HTML)", func(c *qt.C) {
// const locale = "en"
// const page = "test_html"

const expectedTooltip = `<div style="text-align: left;"><b>&lt;b&gt;Test title&lt;/b&gt;&nbsp;•&nbsp;&lt;b&gt;Test description&lt;/b&gt;</b><br>&lt;b&gt;Test extract&lt;/b&gt;</div>`
// const expectedTooltip = `<div style="text-align: left;"><b>&lt;b&gt;Test title&lt;/b&gt;&nbsp;•&nbsp;&lt;b&gt;Test description&lt;/b&gt;</b><br>&lt;b&gt;Test extract&lt;/b&gt;</div>`

cleanTooltip := testLoadAndUnescape(ctx, loader, c, locale, page)
// cleanTooltip := testLoadAndUnescape(ctx, loader, c, locale, page)

c.Assert(cleanTooltip, qt.Equals, expectedTooltip)
})
// c.Assert(cleanTooltip, qt.Equals, expectedTooltip)
// })

c.Run("Normal page (No description)", func(c *qt.C) {
const locale = "en"
const page = "test_no_description"
// c.Run("Normal page (No description)", func(c *qt.C) {
// const locale = "en"
// const page = "test_no_description"

const expectedTooltip = `<div style="text-align: left;"><b>Test title</b><br>Test extract</div>`
// const expectedTooltip = `<div style="text-align: left;"><b>Test title</b><br>Test extract</div>`

cleanTooltip := testLoadAndUnescape(ctx, loader, c, locale, page)
// cleanTooltip := testLoadAndUnescape(ctx, loader, c, locale, page)

c.Assert(cleanTooltip, qt.Equals, expectedTooltip)
})
// c.Assert(cleanTooltip, qt.Equals, expectedTooltip)
// })

// c.Run("Nonexistant page", func(c *qt.C) {
// const locale = "en"
Expand Down
49 changes: 42 additions & 7 deletions internal/resolvers/wikipedia/article_resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,47 @@ type ArticleResolver struct {
articleCache cache.Cache
}

func (r *ArticleResolver) Check(ctx context.Context, url *url.URL) (context.Context, bool) {
isWikipedia := utils.IsSubdomainOf(url, "wikipedia.org")
isWikiArticle := strings.HasPrefix(url.Path, "/wiki/")
// getLocaleCode returns the locale code figured out from the url hostname, or "en" if none is found
func (r *ArticleResolver) getLocaleCode(u *url.URL) string {
localeMatch := localeRegexp.FindStringSubmatch(u.Hostname())
if len(localeMatch) != 2 {
return "en"
}

return localeMatch[1]
}

// getArticleID returns the locale code figured out from the url hostname, or "en" if none is found
func (r *ArticleResolver) getArticleID(u *url.URL) (string, error) {
titleMatch := titleRegexp.FindStringSubmatch(u.Path)
if len(titleMatch) != 2 {
return "", errTitleMatch
}

return titleMatch[1], nil
}

func (r *ArticleResolver) Check(ctx context.Context, u *url.URL) (context.Context, bool) {
if !utils.IsSubdomainOf(u, "wikipedia.org") {
return ctx, false
}

if !strings.HasPrefix(u.Path, "/wiki/") {
return ctx, false
}

// Load locale code & article ID
localeCode := r.getLocaleCode(u)
articleID, err := r.getArticleID(u)
if err != nil {
return ctx, false
}

ctx = contextWithArticleValues(ctx, localeCode, articleID)

// Attach locale code & article ID to context

return ctx, isWikipedia && isWikiArticle
return ctx, true
}

func (r *ArticleResolver) Run(ctx context.Context, url *url.URL, req *http.Request) ([]byte, error) {
Expand All @@ -33,10 +69,9 @@ func (r *ArticleResolver) Name() string {
return "wikipedia:article"
}

func NewArticleResolver(ctx context.Context, cfg config.APIConfig, pool db.Pool) *ArticleResolver {
const endpointURL = "https://%s.wikipedia.org/api/rest_v1/page/summary/%s?redirect=false"
func NewArticleResolver(ctx context.Context, cfg config.APIConfig, pool db.Pool, apiURL string) *ArticleResolver {
articleLoader := &ArticleLoader{
endpointURL: endpointURL,
apiURL: apiURL,
}

r := &ArticleResolver{
Expand Down
Loading

0 comments on commit eecd085

Please sign in to comment.