diff --git a/CHANGELOG.md b/CHANGELOG.md index e740846b..1cb56eb5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - Breaking: Go version 1.17 is now the minimum required version to build this. (#292) - Breaking: Thumbnail generation now requires libvips. See [docs/build.md](./docs/build.md) for prerequisite instructions. (#366, #369) - Breaking: Resolver caches are now stored in PostgreSQL. See [docs/build.md](./docs/build.md) for prerequisite instructions. (#271) +- PDF: Generate customized tooltips for PDF files. (#374) - Twitter: Generate thumbnails with all images of a tweet. (#373) - YouTube: Added support for 'YouTube shorts' URLs. (#299) - Fix: SevenTV emotes now resolve correctly. (#281, #288, #307) diff --git a/go.mod b/go.mod index f9a153f5..0599255d 100644 --- a/go.mod +++ b/go.mod @@ -17,6 +17,7 @@ require ( github.com/nicklaw5/helix v1.25.0 github.com/pashagolub/pgxmock v1.8.0 github.com/patrickmn/go-cache v2.1.0+incompatible + github.com/pdfcpu/pdfcpu v0.3.13 github.com/prometheus/client_golang v1.13.0 github.com/spf13/pflag v1.0.5 github.com/spf13/viper v1.13.0 @@ -43,6 +44,8 @@ require ( github.com/googleapis/gax-go/v2 v2.5.1 // indirect github.com/hashicorp/golang-lru v0.5.5-0.20200511160909-eb529947af53 // indirect github.com/hashicorp/hcl v1.0.0 // indirect + github.com/hhrutter/lzw v0.0.0-20190829144645-6f07a24e8650 // indirect + github.com/hhrutter/tiff v0.0.0-20190829141212-736cae8d0bc7 // indirect github.com/jackc/chunkreader/v2 v2.0.1 // indirect github.com/jackc/pgio v1.0.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect @@ -58,6 +61,7 @@ require ( github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/pelletier/go-toml v1.9.5 // indirect github.com/pelletier/go-toml/v2 v2.0.5 // indirect + github.com/pkg/errors v0.9.1 // indirect github.com/prometheus/client_model v0.2.0 // indirect github.com/prometheus/common v0.37.0 // indirect github.com/prometheus/procfs v0.8.0 // indirect @@ -71,7 +75,7 @@ require ( go.uber.org/multierr v1.6.0 // indirect golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa // indirect golang.org/x/exp/typeparams v0.0.0-20220722155223-a9213eeb770e // indirect - golang.org/x/image v0.0.0-20200927104501-e162460cd6b5 // indirect + golang.org/x/image v0.0.0-20210220032944-ac19c3e999fb // indirect golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect golang.org/x/net v0.0.0-20221012135044-0b7e1fb9d458 // indirect golang.org/x/oauth2 v0.0.0-20221006150949-b44042a4b9c1 // indirect diff --git a/go.sum b/go.sum index e3855c37..7deaeba1 100644 --- a/go.sum +++ b/go.sum @@ -197,6 +197,11 @@ github.com/hashicorp/golang-lru v0.5.5-0.20200511160909-eb529947af53 h1:mcyf48Fj github.com/hashicorp/golang-lru v0.5.5-0.20200511160909-eb529947af53/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/hhrutter/lzw v0.0.0-20190827003112-58b82c5a41cc/go.mod h1:yJBvOcu1wLQ9q9XZmfiPfur+3dQJuIhYQsMGLYcItZk= +github.com/hhrutter/lzw v0.0.0-20190829144645-6f07a24e8650 h1:1yY/RQWNSBjJe2GDCIYoLmpWVidrooriUr4QS/zaATQ= +github.com/hhrutter/lzw v0.0.0-20190829144645-6f07a24e8650/go.mod h1:yJBvOcu1wLQ9q9XZmfiPfur+3dQJuIhYQsMGLYcItZk= +github.com/hhrutter/tiff v0.0.0-20190829141212-736cae8d0bc7 h1:o1wMw7uTNyA58IlEdDpxIrtFHTgnvYzA8sCQz8luv94= +github.com/hhrutter/tiff v0.0.0-20190829141212-736cae8d0bc7/go.mod h1:WkUxfS2JUu3qPo6tRld7ISb8HiC0gVSU91kooBMDVok= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo= @@ -306,6 +311,8 @@ github.com/pashagolub/pgxmock v1.8.0 h1:05JB+jng7yPdeC6i04i8TC4H1Kr7TfcFeQyf4JP6 github.com/pashagolub/pgxmock v1.8.0/go.mod h1:kDkER7/KJdD3HQjNvFw5siwR7yREKmMvwf8VhAgTK5o= github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= +github.com/pdfcpu/pdfcpu v0.3.13 h1:VFon2Yo1PJt+sA57vPAeXWGLSZ7Ux3Jl4h02M0+s3dg= +github.com/pdfcpu/pdfcpu v0.3.13/go.mod h1:UJc5xsXg0fpmjp1zOPdyYcAQArc/Zf3V0nv5URe+9fg= github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/pelletier/go-toml/v2 v2.0.5 h1:ipoSadvV8oGUjnUbMub59IDPPwfxF694nG/jwbMiyQg= @@ -442,8 +449,10 @@ golang.org/x/exp/typeparams v0.0.0-20220722155223-a9213eeb770e h1:7Xs2YCOpMlNqSQ golang.org/x/exp/typeparams v0.0.0-20220722155223-a9213eeb770e/go.mod h1:AbB0pIl9nAr9wVwH+Z2ZpaocVmF5I4GyWCDIsVjR0bk= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.0.0-20200927104501-e162460cd6b5 h1:QelT11PB4FXiDEXucrfNckHoFxwt8USGY1ajP1ZF5lM= +golang.org/x/image v0.0.0-20190823064033-3a9bac650e44/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/image v0.0.0-20200927104501-e162460cd6b5/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20210220032944-ac19c3e999fb h1:fqpd0EBDzlHRCjiphRR5Zo/RSWWQlWv34418dnEixWk= +golang.org/x/image v0.0.0-20210220032944-ac19c3e999fb/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= diff --git a/internal/resolvers/default/link_loader.go b/internal/resolvers/default/link_loader.go index dfe56c9f..54ea5f3e 100644 --- a/internal/resolvers/default/link_loader.go +++ b/internal/resolvers/default/link_loader.go @@ -12,6 +12,7 @@ import ( "strings" "time" + "github.com/Chatterino/api/internal/logger" "github.com/Chatterino/api/internal/staticresponse" "github.com/Chatterino/api/pkg/cache" "github.com/Chatterino/api/pkg/resolver" @@ -21,9 +22,10 @@ import ( ) type LinkLoader struct { - baseURL string - customResolvers []resolver.Resolver - maxContentLength uint64 + baseURL string + customResolvers []resolver.Resolver + contentTypeResolvers []ContentTypeResolver + maxContentLength uint64 } func (l *LinkLoader) defaultTooltipData(doc *goquery.Document, r *http.Request, resp *http.Response) tooltipData { @@ -39,6 +41,8 @@ func (l *LinkLoader) defaultTooltipData(doc *goquery.Document, r *http.Request, } func (l *LinkLoader) Load(ctx context.Context, urlString string, r *http.Request) ([]byte, *int, *string, time.Duration, error) { + log := logger.FromContext(ctx) + requestUrl, err := url.Parse(urlString) if err != nil { return resolver.ReturnInvalidURL() @@ -91,8 +95,28 @@ func (l *LinkLoader) Load(ctx context.Context, urlString string, r *http.Request return staticresponse.SNoLinkInfoFound.Return() } - limiter := &resolver.WriteLimiter{Limit: l.maxContentLength} + contentType := resp.Header.Get("Content-Type") + for _, ctResolver := range l.contentTypeResolvers { + if ctResolver.Check(ctx, contentType) { + ttResponse, err := ctResolver.Run(ctx, r, resp) + if err != nil { + log.Errorw("error running ContentTypeResolver", + "resolver", ctResolver.Name(), + "err", err, + ) + + return utils.MarshalNoDur(&resolver.Response{ + Status: http.StatusInternalServerError, + Message: "ContentTypeResolver error " + resolver.CleanResponse(err.Error()), + }) + } + return utils.MarshalNoDur(ttResponse) + } + } + + // Fallback to parsing via goquery + limiter := &resolver.WriteLimiter{Limit: l.maxContentLength} doc, err := goquery.NewDocumentFromReader(io.TeeReader(resp.Body, limiter)) if err != nil { return utils.MarshalNoDur(&resolver.Response{ @@ -124,7 +148,7 @@ func (l *LinkLoader) Load(ctx context.Context, urlString string, r *http.Request Thumbnail: data.ImageSrc, } - if thumbnail.IsSupportedThumbnailType(resp.Header.Get("content-type")) { + if thumbnail.IsSupportedThumbnailType(contentType) { response.Thumbnail = utils.FormatThumbnailURL(l.baseURL, r, resp.Request.URL.String()) } diff --git a/internal/resolvers/default/link_resolver.go b/internal/resolvers/default/link_resolver.go index 7d63d820..74ae6678 100644 --- a/internal/resolvers/default/link_resolver.go +++ b/internal/resolvers/default/link_resolver.go @@ -227,10 +227,14 @@ func New(ctx context.Context, cfg config.APIConfig, pool db.Pool, helixClient *h youtube.Initialize(ctx, cfg, pool, &customResolvers) seventv.Initialize(ctx, cfg, pool, &customResolvers) + contentTypeResolvers := []ContentTypeResolver{} + contentTypeResolvers = append(contentTypeResolvers, NewPDFResolver(cfg.BaseURL, cfg.MaxContentLength)) + linkLoader := &LinkLoader{ - baseURL: cfg.BaseURL, - maxContentLength: cfg.MaxContentLength, - customResolvers: customResolvers, + baseURL: cfg.BaseURL, + maxContentLength: cfg.MaxContentLength, + customResolvers: customResolvers, + contentTypeResolvers: contentTypeResolvers, } thumbnailLoader := &ThumbnailLoader{ baseURL: cfg.BaseURL, diff --git a/internal/resolvers/default/model.go b/internal/resolvers/default/model.go index 2d64ca43..7b905521 100644 --- a/internal/resolvers/default/model.go +++ b/internal/resolvers/default/model.go @@ -1,11 +1,20 @@ package defaultresolver import ( + "context" "html" + "net/http" "github.com/Chatterino/api/pkg/humanize" + "github.com/Chatterino/api/pkg/resolver" ) +type ContentTypeResolver interface { + Check(ctx context.Context, contentType string) bool + Run(ctx context.Context, req *http.Request, resp *http.Response) (*resolver.Response, error) + Name() string +} + type tooltipData struct { URL string Title string diff --git a/internal/resolvers/default/pdf_resolver.go b/internal/resolvers/default/pdf_resolver.go new file mode 100644 index 00000000..702bec38 --- /dev/null +++ b/internal/resolvers/default/pdf_resolver.go @@ -0,0 +1,110 @@ +package defaultresolver + +import ( + "bytes" + "context" + "html" + "html/template" + "io" + "net/http" + "net/url" + + "github.com/Chatterino/api/internal/logger" + "github.com/Chatterino/api/pkg/humanize" + "github.com/Chatterino/api/pkg/resolver" + "github.com/Chatterino/api/pkg/utils" + "github.com/pdfcpu/pdfcpu/pkg/api" + "github.com/pdfcpu/pdfcpu/pkg/pdfcpu" + "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/validate" +) + +const templateString = `
+PDF File
+{{if .Title}}Title: {{.Title}}
{{end}} +{{if .Author}}Author: {{.Author}}
{{end}} + +{{.PageCount}} pages{{if .CreationDate}} • {{.CreationDate}}{{end}} + +
+` + +var pdfTooltipTemplate = template.Must(template.New("pdfTooltipTemplate").Parse(templateString)) + +type pdfTooltipData struct { + Title string + Author string + PageCount int + CreationDate string +} + +type PDFResolver struct { + baseURL string + maxContentLength uint64 +} + +func (r *PDFResolver) Check(ctx context.Context, contentType string) bool { + return contentType == "application/pdf" +} + +func (r *PDFResolver) Run(ctx context.Context, req *http.Request, resp *http.Response) (*resolver.Response, error) { + log := logger.FromContext(ctx) + + limiter := resolver.WriteLimiter{Limit: r.maxContentLength} + limitedReader := io.TeeReader(resp.Body, &limiter) + buffer, err := io.ReadAll(limitedReader) + if err != nil { + log.Errorw("error reading response body", "err", err) + return nil, err + } + + readSeeker := bytes.NewReader(buffer) + + pdfCtx, err := api.ReadContext(readSeeker, pdfcpu.NewDefaultConfiguration()) + if err != nil { + log.Errorw("error reading pdf context", "err", err) + return nil, err + } + + if err = validate.XRefTable(pdfCtx.XRefTable); err != nil { + log.Errorw("error validating XRefTable", "err", err) + return nil, err + } + + dtString := "" + if creationDt, ok := pdfcpu.DateTime(pdfCtx.CreationDate, true); ok { + dtString = humanize.CreationDate(creationDt) + } + + ttData := pdfTooltipData{ + Title: html.EscapeString(humanize.Title(pdfCtx.Title)), + Author: html.EscapeString(humanize.Title(pdfCtx.Author)), + PageCount: pdfCtx.PageCount, + CreationDate: dtString, + } + + var tooltip bytes.Buffer + if err := pdfTooltipTemplate.Execute(&tooltip, ttData); err != nil { + return nil, err + } + + targetURL := resp.Request.URL.String() + response := &resolver.Response{ + Status: http.StatusOK, + Link: targetURL, + Tooltip: url.PathEscape(tooltip.String()), + Thumbnail: utils.FormatThumbnailURL(r.baseURL, req, targetURL), + } + + return response, nil +} + +func (r *PDFResolver) Name() string { + return "PDFResolver" +} + +func NewPDFResolver(baseURL string, maxContentLength uint64) *PDFResolver { + return &PDFResolver{ + baseURL: baseURL, + maxContentLength: maxContentLength, + } +} diff --git a/pkg/thumbnail/thumbnail.go b/pkg/thumbnail/thumbnail.go index 441c7687..9a3a748f 100644 --- a/pkg/thumbnail/thumbnail.go +++ b/pkg/thumbnail/thumbnail.go @@ -10,8 +10,19 @@ import ( ) var ( - supportedThumbnails = []string{"image/jpeg", "image/png", "image/gif", "image/webp"} - animatedThumbnails = []string{"image/gif", "image/webp"} + supportedThumbnails = []string{ + "image/jpeg", + "image/png", + "image/gif", + "image/webp", + "application/pdf", + } + + // Subset of supportedThumbnails that should be treated as animated + animatedThumbnails = []string{ + "image/gif", + "image/webp", + } cfg config.APIConfig ) @@ -58,7 +69,14 @@ func BuildStaticThumbnail(inputBuf []byte, resp *http.Response) ([]byte, error) return []byte{}, fmt.Errorf("could not transform image from url: %s", resp.Request.URL) } - outputBuf, _, err := image.ExportNative() + var outputBuf []byte + if image.Format() == vips.ImageTypePDF { + // Export thumbnails for PDF as PNG + outputBuf, _, err = image.ExportPng(vips.NewPngExportParams()) + } else { + outputBuf, _, err = image.ExportNative() + } + if err != nil { return []byte{}, fmt.Errorf("could not export image from url: %s", resp.Request.URL) }