Skip to content

Commit

Permalink
feat: smarter image url detect
Browse files Browse the repository at this point in the history
  • Loading branch information
JingYiJun committed Mar 29, 2024
1 parent b9fe56b commit 390ef6f
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 40 deletions.
6 changes: 3 additions & 3 deletions utils/sensitive/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ type ResponseForCheck struct {
}

func CheckSensitive(params ParamsForCheck) (resp *ResponseForCheck, err error) {
images := detect(params.Content)
images, clearContent := findImagesInMarkdownContent(params.Content)
if len(images) != 0 {
for _, img := range images {
pass, err := checkValidUrl(img)
Expand Down Expand Up @@ -62,13 +62,13 @@ func CheckSensitive(params ParamsForCheck) (resp *ResponseForCheck, err error) {
}
}

params.Content = deleteImagesInMarkdown(params.Content)
if hasTextUrl(params.Content) {
if hasTextUrl(clearContent) {
return &ResponseForCheck{
Pass: false,
Labels: nil,
}, nil
}
params.Content = clearContent

return CheckSensitiveText(params)
}
Expand Down
54 changes: 17 additions & 37 deletions utils/sensitive/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,36 +5,30 @@ import (
"mvdan.cc/xurls/v2"
imageUrl "net/url"
"regexp"
"strings"
"treehole_next/config"
)

var imageRegex = regexp.MustCompile(
`!\[.*?]\(([^" )]*)`,
`!\[(.*?)]\(([^" )]*?)\s*(".*?")?\)`,
)
var deleteImageRegex = regexp.MustCompile(
`!\[(.*?)]\(([^" ]*)( ".*")?\)`,
)

// findImagesInMarkdown 从Markdown文本中查找所有图片链接
func findImagesInMarkdown(markdown string) []string {

matches := imageRegex.FindAllStringSubmatch(markdown, -1)
images := make([]string, 0, len(matches))
for _, match := range matches {
images = append(images, match[1])
}
return images
}

func detect(markdownText string) []string {
// findImagesInMarkdown 从Markdown文本中查找所有图片链接,并且返回清除链接之后的文本
func findImagesInMarkdownContent(content string) (imageUrls []string, clearContent string) {

var ret []string
images := findImagesInMarkdown(markdownText)
for _, image := range images {
ret = append(ret, image)
}

return ret
clearContent = imageRegex.ReplaceAllStringFunc(content, func(s string) string {
submatch := imageRegex.FindStringSubmatch(s)
altText := submatch[1]
imageUrls = append(imageUrls, submatch[2])
if len(submatch) > 3 && submatch[3] != "" {
// If there is a title, return it along with the alt text
title := strings.Trim(submatch[3], "\"")
return altText + " " + title
}
// If there is no title, return the alt text
return altText
})
return
}

func checkType(params ParamsForCheck) bool {
Expand All @@ -60,17 +54,3 @@ func checkValidUrl(input string) (bool, error) {
}
return true, nil
}

func deleteImagesInMarkdown(markdown string) string {

return imageRegex.ReplaceAllStringFunc(markdown, func(s string) string {
submatches := deleteImageRegex.FindStringSubmatch(s)
altText := submatches[1]
if len(submatches) > 3 && submatches[3] != "" {
// If there is a title, return it along with the alt text
return altText + " " + submatches[3][2:len(submatches[3])-1]
}
// If there is no title, return the alt text
return altText
})
}
46 changes: 46 additions & 0 deletions utils/sensitive/utils_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package sensitive

import (
"github.com/stretchr/testify/assert"
"testing"
)

func TestFindImagesInMarkdown(t *testing.T) {

type wantStruct struct {
clearContent string
imageUrls []string
}
tests := []struct {
text string
want wantStruct
}{
{
text: `![image1](https://example.com/image1)`,
want: wantStruct{
clearContent: `image1`,
imageUrls: []string{"https://example.com/image1"},
},
},
{
text: `![image1](https://example.com/image1) ![image2](https://example.com/image2)`,
want: wantStruct{
clearContent: `image1 image2`,
imageUrls: []string{"https://example.com/image1", "https://example.com/image2"},
},
},
{
text: `![image1](https://example.com/image1 "title1") ![image2](https://example.com/image2 "title2")`,
want: wantStruct{
clearContent: `image1 title1 image2 title2`,
imageUrls: []string{"https://example.com/image1", "https://example.com/image2"},
},
},
}

for _, tt := range tests {
imageUrls, cleanText := findImagesInMarkdownContent(tt.text)
assert.EqualValues(t, tt.want.clearContent, cleanText, "cleanText should be equal")
assert.EqualValues(t, tt.want.imageUrls, imageUrls, "imageUrls should be equal")
}
}

0 comments on commit 390ef6f

Please sign in to comment.