Skip to content

Commit

Permalink
feat: add tooltip support for PDF files (#374)
Browse files Browse the repository at this point in the history
Co-authored-by: Rasmus Karlsson <rasmus.karlsson@pajlada.com>
  • Loading branch information
leon-richardt and pajlada committed Oct 16, 2022
1 parent dbc34af commit 67a1371
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- Breaking: Go version 1.17 is now the minimum required version to build this. (#292)
- Breaking: Thumbnail generation now requires libvips. See [docs/build.md](./docs/build.md) for prerequisite instructions. (#366, #369)
- Breaking: Resolver caches are now stored in PostgreSQL. See [docs/build.md](./docs/build.md) for prerequisite instructions. (#271)
- PDF: Generate customized tooltips for PDF files. (#374)
- Twitter: Generate thumbnails with all images of a tweet. (#373)
- YouTube: Added support for 'YouTube shorts' URLs. (#299)
- Fix: SevenTV emotes now resolve correctly. (#281, #288, #307)
Expand Down
6 changes: 5 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ require (
github.com/nicklaw5/helix v1.25.0
github.com/pashagolub/pgxmock v1.8.0
github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/pdfcpu/pdfcpu v0.3.13
github.com/prometheus/client_golang v1.13.0
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.13.0
Expand All @@ -43,6 +44,8 @@ require (
github.com/googleapis/gax-go/v2 v2.5.1 // indirect
github.com/hashicorp/golang-lru v0.5.5-0.20200511160909-eb529947af53 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/hhrutter/lzw v0.0.0-20190829144645-6f07a24e8650 // indirect
github.com/hhrutter/tiff v0.0.0-20190829141212-736cae8d0bc7 // indirect
github.com/jackc/chunkreader/v2 v2.0.1 // indirect
github.com/jackc/pgio v1.0.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
Expand All @@ -58,6 +61,7 @@ require (
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/pelletier/go-toml v1.9.5 // indirect
github.com/pelletier/go-toml/v2 v2.0.5 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/common v0.37.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect
Expand All @@ -71,7 +75,7 @@ require (
go.uber.org/multierr v1.6.0 // indirect
golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa // indirect
golang.org/x/exp/typeparams v0.0.0-20220722155223-a9213eeb770e // indirect
golang.org/x/image v0.0.0-20200927104501-e162460cd6b5 // indirect
golang.org/x/image v0.0.0-20210220032944-ac19c3e999fb // indirect
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect
golang.org/x/net v0.0.0-20221012135044-0b7e1fb9d458 // indirect
golang.org/x/oauth2 v0.0.0-20221006150949-b44042a4b9c1 // indirect
Expand Down
11 changes: 10 additions & 1 deletion go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,11 @@ github.com/hashicorp/golang-lru v0.5.5-0.20200511160909-eb529947af53 h1:mcyf48Fj
github.com/hashicorp/golang-lru v0.5.5-0.20200511160909-eb529947af53/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/hhrutter/lzw v0.0.0-20190827003112-58b82c5a41cc/go.mod h1:yJBvOcu1wLQ9q9XZmfiPfur+3dQJuIhYQsMGLYcItZk=
github.com/hhrutter/lzw v0.0.0-20190829144645-6f07a24e8650 h1:1yY/RQWNSBjJe2GDCIYoLmpWVidrooriUr4QS/zaATQ=
github.com/hhrutter/lzw v0.0.0-20190829144645-6f07a24e8650/go.mod h1:yJBvOcu1wLQ9q9XZmfiPfur+3dQJuIhYQsMGLYcItZk=
github.com/hhrutter/tiff v0.0.0-20190829141212-736cae8d0bc7 h1:o1wMw7uTNyA58IlEdDpxIrtFHTgnvYzA8sCQz8luv94=
github.com/hhrutter/tiff v0.0.0-20190829141212-736cae8d0bc7/go.mod h1:WkUxfS2JUu3qPo6tRld7ISb8HiC0gVSU91kooBMDVok=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo=
Expand Down Expand Up @@ -306,6 +311,8 @@ github.com/pashagolub/pgxmock v1.8.0 h1:05JB+jng7yPdeC6i04i8TC4H1Kr7TfcFeQyf4JP6
github.com/pashagolub/pgxmock v1.8.0/go.mod h1:kDkER7/KJdD3HQjNvFw5siwR7yREKmMvwf8VhAgTK5o=
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
github.com/pdfcpu/pdfcpu v0.3.13 h1:VFon2Yo1PJt+sA57vPAeXWGLSZ7Ux3Jl4h02M0+s3dg=
github.com/pdfcpu/pdfcpu v0.3.13/go.mod h1:UJc5xsXg0fpmjp1zOPdyYcAQArc/Zf3V0nv5URe+9fg=
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pelletier/go-toml/v2 v2.0.5 h1:ipoSadvV8oGUjnUbMub59IDPPwfxF694nG/jwbMiyQg=
Expand Down Expand Up @@ -442,8 +449,10 @@ golang.org/x/exp/typeparams v0.0.0-20220722155223-a9213eeb770e h1:7Xs2YCOpMlNqSQ
golang.org/x/exp/typeparams v0.0.0-20220722155223-a9213eeb770e/go.mod h1:AbB0pIl9nAr9wVwH+Z2ZpaocVmF5I4GyWCDIsVjR0bk=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.0.0-20200927104501-e162460cd6b5 h1:QelT11PB4FXiDEXucrfNckHoFxwt8USGY1ajP1ZF5lM=
golang.org/x/image v0.0.0-20190823064033-3a9bac650e44/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.0.0-20200927104501-e162460cd6b5/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.0.0-20210220032944-ac19c3e999fb h1:fqpd0EBDzlHRCjiphRR5Zo/RSWWQlWv34418dnEixWk=
golang.org/x/image v0.0.0-20210220032944-ac19c3e999fb/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
Expand Down
34 changes: 29 additions & 5 deletions internal/resolvers/default/link_loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"strings"
"time"

"github.com/Chatterino/api/internal/logger"
"github.com/Chatterino/api/internal/staticresponse"
"github.com/Chatterino/api/pkg/cache"
"github.com/Chatterino/api/pkg/resolver"
Expand All @@ -21,9 +22,10 @@ import (
)

type LinkLoader struct {
baseURL string
customResolvers []resolver.Resolver
maxContentLength uint64
baseURL string
customResolvers []resolver.Resolver
contentTypeResolvers []ContentTypeResolver
maxContentLength uint64
}

func (l *LinkLoader) defaultTooltipData(doc *goquery.Document, r *http.Request, resp *http.Response) tooltipData {
Expand All @@ -39,6 +41,8 @@ func (l *LinkLoader) defaultTooltipData(doc *goquery.Document, r *http.Request,
}

func (l *LinkLoader) Load(ctx context.Context, urlString string, r *http.Request) ([]byte, *int, *string, time.Duration, error) {
log := logger.FromContext(ctx)

requestUrl, err := url.Parse(urlString)
if err != nil {
return resolver.ReturnInvalidURL()
Expand Down Expand Up @@ -91,8 +95,28 @@ func (l *LinkLoader) Load(ctx context.Context, urlString string, r *http.Request
return staticresponse.SNoLinkInfoFound.Return()
}

limiter := &resolver.WriteLimiter{Limit: l.maxContentLength}
contentType := resp.Header.Get("Content-Type")
for _, ctResolver := range l.contentTypeResolvers {
if ctResolver.Check(ctx, contentType) {
ttResponse, err := ctResolver.Run(ctx, r, resp)
if err != nil {
log.Errorw("error running ContentTypeResolver",
"resolver", ctResolver.Name(),
"err", err,
)

return utils.MarshalNoDur(&resolver.Response{
Status: http.StatusInternalServerError,
Message: "ContentTypeResolver error " + resolver.CleanResponse(err.Error()),
})
}

return utils.MarshalNoDur(ttResponse)
}
}

// Fallback to parsing via goquery
limiter := &resolver.WriteLimiter{Limit: l.maxContentLength}
doc, err := goquery.NewDocumentFromReader(io.TeeReader(resp.Body, limiter))
if err != nil {
return utils.MarshalNoDur(&resolver.Response{
Expand Down Expand Up @@ -124,7 +148,7 @@ func (l *LinkLoader) Load(ctx context.Context, urlString string, r *http.Request
Thumbnail: data.ImageSrc,
}

if thumbnail.IsSupportedThumbnailType(resp.Header.Get("content-type")) {
if thumbnail.IsSupportedThumbnailType(contentType) {
response.Thumbnail = utils.FormatThumbnailURL(l.baseURL, r, resp.Request.URL.String())
}

Expand Down
10 changes: 7 additions & 3 deletions internal/resolvers/default/link_resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,10 +227,14 @@ func New(ctx context.Context, cfg config.APIConfig, pool db.Pool, helixClient *h
youtube.Initialize(ctx, cfg, pool, &customResolvers)
seventv.Initialize(ctx, cfg, pool, &customResolvers)

contentTypeResolvers := []ContentTypeResolver{}
contentTypeResolvers = append(contentTypeResolvers, NewPDFResolver(cfg.BaseURL, cfg.MaxContentLength))

linkLoader := &LinkLoader{
baseURL: cfg.BaseURL,
maxContentLength: cfg.MaxContentLength,
customResolvers: customResolvers,
baseURL: cfg.BaseURL,
maxContentLength: cfg.MaxContentLength,
customResolvers: customResolvers,
contentTypeResolvers: contentTypeResolvers,
}
thumbnailLoader := &ThumbnailLoader{
baseURL: cfg.BaseURL,
Expand Down
9 changes: 9 additions & 0 deletions internal/resolvers/default/model.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
package defaultresolver

import (
"context"
"html"
"net/http"

"github.com/Chatterino/api/pkg/humanize"
"github.com/Chatterino/api/pkg/resolver"
)

type ContentTypeResolver interface {
Check(ctx context.Context, contentType string) bool
Run(ctx context.Context, req *http.Request, resp *http.Response) (*resolver.Response, error)
Name() string
}

type tooltipData struct {
URL string
Title string
Expand Down
110 changes: 110 additions & 0 deletions internal/resolvers/default/pdf_resolver.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package defaultresolver

import (
"bytes"
"context"
"html"
"html/template"
"io"
"net/http"
"net/url"

"github.com/Chatterino/api/internal/logger"
"github.com/Chatterino/api/pkg/humanize"
"github.com/Chatterino/api/pkg/resolver"
"github.com/Chatterino/api/pkg/utils"
"github.com/pdfcpu/pdfcpu/pkg/api"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/validate"
)

const templateString = `<div style="text-align: left;">
<b>PDF File</b><br>
{{if .Title}}<b>Title:</b> {{.Title}}<br>{{end}}
{{if .Author}}<b>Author:</b> {{.Author}}<br>{{end}}
<span style="color: #808892;">
{{.PageCount}} pages{{if .CreationDate}}&nbsp;•&nbsp;{{.CreationDate}}{{end}}
</span>
</div>
`

var pdfTooltipTemplate = template.Must(template.New("pdfTooltipTemplate").Parse(templateString))

type pdfTooltipData struct {
Title string
Author string
PageCount int
CreationDate string
}

type PDFResolver struct {
baseURL string
maxContentLength uint64
}

func (r *PDFResolver) Check(ctx context.Context, contentType string) bool {
return contentType == "application/pdf"
}

func (r *PDFResolver) Run(ctx context.Context, req *http.Request, resp *http.Response) (*resolver.Response, error) {
log := logger.FromContext(ctx)

limiter := resolver.WriteLimiter{Limit: r.maxContentLength}
limitedReader := io.TeeReader(resp.Body, &limiter)
buffer, err := io.ReadAll(limitedReader)
if err != nil {
log.Errorw("error reading response body", "err", err)
return nil, err
}

readSeeker := bytes.NewReader(buffer)

pdfCtx, err := api.ReadContext(readSeeker, pdfcpu.NewDefaultConfiguration())
if err != nil {
log.Errorw("error reading pdf context", "err", err)
return nil, err
}

if err = validate.XRefTable(pdfCtx.XRefTable); err != nil {
log.Errorw("error validating XRefTable", "err", err)
return nil, err
}

dtString := ""
if creationDt, ok := pdfcpu.DateTime(pdfCtx.CreationDate, true); ok {
dtString = humanize.CreationDate(creationDt)
}

ttData := pdfTooltipData{
Title: html.EscapeString(humanize.Title(pdfCtx.Title)),
Author: html.EscapeString(humanize.Title(pdfCtx.Author)),
PageCount: pdfCtx.PageCount,
CreationDate: dtString,
}

var tooltip bytes.Buffer
if err := pdfTooltipTemplate.Execute(&tooltip, ttData); err != nil {
return nil, err
}

targetURL := resp.Request.URL.String()
response := &resolver.Response{
Status: http.StatusOK,
Link: targetURL,
Tooltip: url.PathEscape(tooltip.String()),
Thumbnail: utils.FormatThumbnailURL(r.baseURL, req, targetURL),
}

return response, nil
}

func (r *PDFResolver) Name() string {
return "PDFResolver"
}

func NewPDFResolver(baseURL string, maxContentLength uint64) *PDFResolver {
return &PDFResolver{
baseURL: baseURL,
maxContentLength: maxContentLength,
}
}
24 changes: 21 additions & 3 deletions pkg/thumbnail/thumbnail.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,19 @@ import (
)

var (
supportedThumbnails = []string{"image/jpeg", "image/png", "image/gif", "image/webp"}
animatedThumbnails = []string{"image/gif", "image/webp"}
supportedThumbnails = []string{
"image/jpeg",
"image/png",
"image/gif",
"image/webp",
"application/pdf",
}

// Subset of supportedThumbnails that should be treated as animated
animatedThumbnails = []string{
"image/gif",
"image/webp",
}

cfg config.APIConfig
)
Expand Down Expand Up @@ -58,7 +69,14 @@ func BuildStaticThumbnail(inputBuf []byte, resp *http.Response) ([]byte, error)
return []byte{}, fmt.Errorf("could not transform image from url: %s", resp.Request.URL)
}

outputBuf, _, err := image.ExportNative()
var outputBuf []byte
if image.Format() == vips.ImageTypePDF {
// Export thumbnails for PDF as PNG
outputBuf, _, err = image.ExportPng(vips.NewPngExportParams())
} else {
outputBuf, _, err = image.ExportNative()
}

if err != nil {
return []byte{}, fmt.Errorf("could not export image from url: %s", resp.Request.URL)
}
Expand Down

0 comments on commit 67a1371

Please sign in to comment.