From d961c58092572a96a2d52ae18f29327c70dd85f1 Mon Sep 17 00:00:00 2001 From: Dmitry Verkhoturov Date: Mon, 17 Jul 2023 22:50:36 +0200 Subject: [PATCH] remove proxied images from sanity check Previously, proxied and local images were checked for presence in the storage before previewing or posting the comment. That logic resulted in an inability to post with an image when a proxy for images is enabled, as proxied images are not downloaded to disk before the first time someone loads them, which could only happen after the user either previews or posts the message. After this change, preview and post only checks the local images' presence and ignore the proxied ones. --- backend/app/rest/api/rest_private.go | 10 +-- backend/app/rest/api/rest_private_test.go | 84 ++++++++++++++++++++++- backend/app/rest/api/rest_public_test.go | 8 +-- backend/app/store/image/image.go | 82 ++++++++++++---------- backend/app/store/image/image_test.go | 3 +- backend/app/store/service/service.go | 2 +- 6 files changed, 142 insertions(+), 47 deletions(-) diff --git a/backend/app/rest/api/rest_private.go b/backend/app/rest/api/rest_private.go index f193023293..f7e9f6f5cd 100644 --- a/backend/app/rest/api/rest_private.go +++ b/backend/app/rest/api/rest_private.go @@ -90,11 +90,11 @@ func (s *private) previewCommentCtrl(w http.ResponseWriter, r *http.Request) { comment = s.commentFormatter.Format(comment) comment.Sanitize() - // check if images are valid - for _, id := range s.imageService.ExtractPictures(comment.Text) { + // check if images are valid, omit proxied images as they are lazy-loaded + for _, id := range s.imageService.ExtractNonProxiedPictures(comment.Text) { err := s.imageService.ResetCleanupTimer(id) if err != nil { - rest.SendErrorJSON(w, r, http.StatusBadRequest, err, "can't renew staged picture cleanup timer", rest.ErrImgNotFound) + rest.SendErrorJSON(w, r, http.StatusBadRequest, err, "can't load picture from the comment", rest.ErrImgNotFound) return } } @@ -129,8 +129,8 @@ func (s *private) createCommentCtrl(w http.ResponseWriter, r *http.Request) { } comment = s.commentFormatter.Format(comment) - // check if images are valid - for _, id := range s.imageService.ExtractPictures(comment.Text) { + // check if images are valid, omit proxied images as they are lazy-loaded + for _, id := range s.imageService.ExtractNonProxiedPictures(comment.Text) { _, err := s.imageService.Load(id) if err != nil { rest.SendErrorJSON(w, r, http.StatusBadRequest, err, "can't load picture from the comment", rest.ErrImgNotFound) diff --git a/backend/app/rest/api/rest_private_test.go b/backend/app/rest/api/rest_private_test.go index 6537519b9f..df50100332 100644 --- a/backend/app/rest/api/rest_private_test.go +++ b/backend/app/rest/api/rest_private_test.go @@ -10,6 +10,7 @@ import ( "io" "mime/multipart" "net/http" + "net/http/httptest" "os" "strings" "testing" @@ -24,6 +25,7 @@ import ( "github.com/stretchr/testify/require" "github.com/umputun/remark42/backend/app/notify" + "github.com/umputun/remark42/backend/app/rest/proxy" "github.com/umputun/remark42/backend/app/store" "github.com/umputun/remark42/backend/app/store/image" ) @@ -69,7 +71,7 @@ func TestRest_CreateFilteredCode(t *testing.T) { c := R.JSON{} err = json.Unmarshal(b, &c) - assert.NoError(t, err) + require.NoError(t, err, string(b)) loc := c["locator"].(map[string]interface{}) assert.Equal(t, "remark42", loc["site"]) assert.Equal(t, "https://radio-t.com/blah1", loc["url"]) @@ -79,6 +81,86 @@ func TestRest_CreateFilteredCode(t *testing.T) { assert.True(t, len(c["id"].(string)) > 8) } +// based on issue https://github.com/umputun/remark42/issues/1631 +func TestRest_CreateAndPreviewWithImage(t *testing.T) { + ts, srv, teardown := startupT(t) + ts.Close() + defer teardown() + + srv.ImageService.ProxyAPI = srv.RemarkURL + "/api/v1/img" + srv.ImageProxy = &proxy.Image{ + HTTP2HTTPS: true, + CacheExternal: true, + RoutePath: "/api/v1/img", + RemarkURL: srv.RemarkURL, + ImageService: srv.ImageService, + } + srv.CommentFormatter = store.NewCommentFormatter(srv.ImageProxy) + // need to recreate the server with new ImageProxy, otherwise old one will be used + ts = httptest.NewServer(srv.routes()) + defer ts.Close() + + var pngRead bool + // server with the test PNG image + pngServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, e := io.Copy(w, gopherPNG()) + assert.NoError(t, e) + pngRead = true + })) + defer pngServer.Close() + + t.Run("create", func(t *testing.T) { + resp, err := post(t, ts.URL+"/api/v1/comment", + `{"text": "![](`+pngServer.URL+`/gopher.png)", "locator":{"url": "https://radio-t.com/blah1", "site": "remark42"}}`) + assert.NoError(t, err) + b, err := io.ReadAll(resp.Body) + assert.NoError(t, err) + require.Equal(t, http.StatusCreated, resp.StatusCode, string(b)) + assert.NoError(t, resp.Body.Close()) + + c := R.JSON{} + err = json.Unmarshal(b, &c) + require.NoError(t, err, string(b)) + assert.NotContains(t, c["text"], pngServer.URL) + assert.Contains(t, c["text"], srv.RemarkURL) + loc := c["locator"].(map[string]interface{}) + assert.Equal(t, "remark42", loc["site"]) + assert.Equal(t, "https://radio-t.com/blah1", loc["url"]) + assert.True(t, len(c["id"].(string)) > 8) + assert.Equal(t, false, pngRead, "original image is not yet accessed by server") + }) + + t.Run("preview", func(t *testing.T) { + resp, err := post(t, ts.URL+"/api/v1/preview", + `{"text": "![](`+pngServer.URL+`/gopher.png)", "locator":{"url": "https://radio-t.com/blah1", "site": "remark42"}}`) + assert.NoError(t, err) + b, err := io.ReadAll(resp.Body) + assert.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode, string(b)) + assert.NoError(t, resp.Body.Close()) + + assert.NotContains(t, string(b), pngServer.URL) + assert.Contains(t, string(b), srv.RemarkURL) + + assert.Equal(t, false, pngRead, "original image is not yet accessed by server") + // retrieve the image from the cache + imgURL := strings.Split(strings.Split(string(b), "src=\"")[1], "\"")[0] + // replace srv.RemarkURL with ts.URL + imgURL = strings.ReplaceAll(imgURL, srv.RemarkURL, ts.URL) + resp, err = http.Get(imgURL) + assert.NoError(t, err) + b, err = io.ReadAll(resp.Body) + assert.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode, string(b)) + assert.NoError(t, resp.Body.Close()) + // compare image to original gopher png after decoding from base64 + assert.Equal(t, gopher, base64.StdEncoding.EncodeToString(b)) + + assert.Equal(t, true, pngRead, "original image accessed to be shown to user") + }) + +} + func TestRest_CreateOldPost(t *testing.T) { ts, srv, teardown := startupT(t) defer teardown() diff --git a/backend/app/rest/api/rest_public_test.go b/backend/app/rest/api/rest_public_test.go index 32358c146f..4f307fd0fc 100644 --- a/backend/app/rest/api/rest_public_test.go +++ b/backend/app/rest/api/rest_public_test.go @@ -76,8 +76,8 @@ func TestRest_Preview(t *testing.T) { assert.NoError(t, resp.Body.Close()) assert.Contains(t, string(b), - "{\"code\":20,\"details\":\"can't renew staged picture cleanup timer\","+ - "\"error\":\"can't get image stats for dev_user/bad_picture: stat", + `{"code":20,"details":"can't load picture from the comment",`+ + `"error":"can't get image stats for dev_user/bad_picture: stat`, ) assert.Contains(t, string(b), @@ -97,8 +97,8 @@ func TestRest_PreviewWithWrongImage(t *testing.T) { assert.NoError(t, resp.Body.Close()) assert.Contains(t, string(b), - "{\"code\":20,\"details\":\"can't renew staged picture cleanup timer\","+ - "\"error\":\"can't get image stats for dev_user/bad_picture: stat ", + `{"code":20,"details":"can't load picture from the comment",`+ + `"error":"can't get image stats for dev_user/bad_picture: stat `, ) assert.Contains(t, string(b), diff --git a/backend/app/store/image/image.go b/backend/app/store/image/image.go index dc68312b1a..294206292a 100644 --- a/backend/app/store/image/image.go +++ b/backend/app/store/image/image.go @@ -88,8 +88,8 @@ func NewService(s Store, p ServiceParams) *Service { return &Service{ServiceParams: p, store: s} } -// SubmitAndCommit multiple ids immediately -func (s *Service) SubmitAndCommit(idsFn func() []string) error { +// Commit multiple ids immediately +func (s *Service) Commit(idsFn func() []string) error { errs := new(multierror.Error) for _, id := range idsFn() { err := s.store.Commit(id) @@ -117,7 +117,7 @@ func (s *Service) Submit(idsFn func() []string) { for atomic.LoadInt32(&s.term) == 0 && time.Since(req.TS) <= s.EditDuration { time.Sleep(time.Millisecond * 10) // small sleep to relive busy wait but keep reactive for term (close) } - err := s.SubmitAndCommit(req.idsFn) + err := s.Commit(req.idsFn) if err != nil { log.Printf("[WARN] image commit error %v", err) } @@ -141,39 +141,14 @@ func (s *Service) Submit(idsFn func() []string) { // ExtractPictures gets list of images from the doc html and convert from urls to ids, i.e. user/pic.png func (s *Service) ExtractPictures(commentHTML string) (ids []string) { - doc, err := goquery.NewDocumentFromReader(strings.NewReader(commentHTML)) - if err != nil { - log.Printf("[ERROR] can't parse commentHTML to parse images: %q, error: %v", commentHTML, err) - return nil - } - doc.Find("img").Each(func(i int, sl *goquery.Selection) { - if im, ok := sl.Attr("src"); ok { - if strings.Contains(im, s.ImageAPI) { - elems := strings.Split(im, "/") - if len(elems) >= 2 { - id := elems[len(elems)-2] + "/" + elems[len(elems)-1] - ids = append(ids, id) - } - } - if strings.Contains(im, s.ProxyAPI) { - proxiedURL, err := url.Parse(im) - if err != nil { - return - } - imgURL, err := base64.URLEncoding.DecodeString(proxiedURL.Query().Get("src")) - if err != nil { - return - } - imgID, err := CachedImgID(string(imgURL)) - if err != nil { - return - } - ids = append(ids, imgID) - } - } - }) + return s.extractImageIDs(commentHTML, true) +} - return ids +// ExtractNonProxiedPictures gets list of non-proxied images from the doc html and convert from urls to ids, i.e. user/pic.png +// This method is used in image check on post preview and load, as proxied images have lazy loading +// and wouldn't be present on disk but still valid as they will be loaded the first time someone requests them. +func (s *Service) ExtractNonProxiedPictures(commentHTML string) (ids []string) { + return s.extractImageIDs(commentHTML, false) } // Cleanup runs periodic cleanup with 1.5*ServiceParams.EditDuration. Blocking loop, should be called inside of goroutine by consumer @@ -262,6 +237,43 @@ func (s *Service) ImgContentType(img []byte) string { return contentType } +// returns list of image IDs from the comment html, including proxied images if includeProxied is true +func (s *Service) extractImageIDs(commentHTML string, includeProxied bool) (ids []string) { + doc, err := goquery.NewDocumentFromReader(strings.NewReader(commentHTML)) + if err != nil { + log.Printf("[ERROR] can't parse commentHTML to parse images: %q, error: %v", commentHTML, err) + return nil + } + doc.Find("img").Each(func(i int, sl *goquery.Selection) { + if im, ok := sl.Attr("src"); ok { + if strings.Contains(im, s.ImageAPI) { + elems := strings.Split(im, "/") + if len(elems) >= 2 { + id := elems[len(elems)-2] + "/" + elems[len(elems)-1] + ids = append(ids, id) + } + } + if includeProxied && strings.Contains(im, s.ProxyAPI) { + proxiedURL, err := url.Parse(im) + if err != nil { + return + } + imgURL, err := base64.URLEncoding.DecodeString(proxiedURL.Query().Get("src")) + if err != nil { + return + } + imgID, err := CachedImgID(string(imgURL)) + if err != nil { + return + } + ids = append(ids, imgID) + } + } + }) + + return ids +} + // prepareImage calls readAndValidateImage and resize on provided image. func (s *Service) prepareImage(r io.Reader) ([]byte, error) { data, err := readAndValidateImage(r, s.MaxSize) diff --git a/backend/app/store/image/image_test.go b/backend/app/store/image/image_test.go index 7c9483b5ec..7ca85b1402 100644 --- a/backend/app/store/image/image_test.go +++ b/backend/app/store/image/image_test.go @@ -105,6 +105,7 @@ func TestService_ExtractPictures(t *testing.T) { ids = svc.ExtractPictures(html) require.Equal(t, 1, len(ids), "one image in") assert.Equal(t, "cached_images/12318fbd4c55e9d177b8b5ae197bc89c5afd8e07-a41fcb00643f28d700504256ec81cbf2e1aac53e", ids[0]) + require.Empty(t, svc.ExtractNonProxiedPictures(html), "no non-proxied images expected to be found") // bad url html = `` @@ -150,7 +151,7 @@ func TestService_Submit(t *testing.T) { svc := NewService(&store, ServiceParams{ImageAPI: "/blah/", EditDuration: time.Millisecond * 100}) svc.Submit(func() []string { return []string{"id1", "id2", "id3"} }) assert.Equal(t, 3, len(store.ResetCleanupTimerCalls())) - err := svc.SubmitAndCommit(func() []string { return []string{"id4", "id5"} }) + err := svc.Commit(func() []string { return []string{"id4", "id5"} }) assert.NoError(t, err) svc.Submit(func() []string { return []string{"id6", "id7"} }) assert.Equal(t, 5, len(store.ResetCleanupTimerCalls())) diff --git a/backend/app/store/service/service.go b/backend/app/store/service/service.go index e03ccc2ad7..5503712b52 100644 --- a/backend/app/store/service/service.go +++ b/backend/app/store/service/service.go @@ -279,7 +279,7 @@ func (s *DataStore) submitImages(comment store.Comment) { var err error if comment.Imported { - err = s.ImageService.SubmitAndCommit(idsFn) + err = s.ImageService.Commit(idsFn) } else { s.ImageService.Submit(idsFn) }