From 4752d40cf356e8e165635655ee7f028d81b6e34f Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Tue, 7 Dec 2021 18:51:00 +0800 Subject: [PATCH 1/5] Fix a panic in NotifyCreateIssueComment (caused by string truncation) --- models/repo_archiver.go | 1 - modules/notification/action/action.go | 13 ++++---- modules/util/truncate.go | 44 +++++++++++++++++++++---- modules/util/truncate_test.go | 46 +++++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 14 deletions(-) create mode 100644 modules/util/truncate_test.go diff --git a/models/repo_archiver.go b/models/repo_archiver.go index 2369a16108038..9363d09574b02 100644 --- a/models/repo_archiver.go +++ b/models/repo_archiver.go @@ -6,7 +6,6 @@ package models import ( "code.gitea.io/gitea/models/db" - repo_model "code.gitea.io/gitea/models/repo" ) diff --git a/modules/notification/action/action.go b/modules/notification/action/action.go index 80f97ad9932c9..d5007fbbf3084 100644 --- a/modules/notification/action/action.go +++ b/modules/notification/action/action.go @@ -15,6 +15,7 @@ import ( "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/notification/base" "code.gitea.io/gitea/modules/repository" + "code.gitea.io/gitea/modules/util" ) type actionNotifier struct { @@ -100,12 +101,12 @@ func (a *actionNotifier) NotifyCreateIssueComment(doer *user_model.User, repo *m IsPrivate: issue.Repo.IsPrivate, } - content := "" - - if len(comment.Content) > 200 { - content = comment.Content[:strings.LastIndex(comment.Content[0:200], " ")] + "…" - } else { - content = comment.Content + content, truncatedRight := util.SplitStringAtRuneN(comment.Content, 200) + if truncatedRight != "" { + // in case the content is in a Latin family language, we remove the last broken word. + if lastSpaceIdx := strings.LastIndex(content, " "); lastSpaceIdx != -1 && (len(content)-lastSpaceIdx < 15) { + content = comment.Content[:lastSpaceIdx] + "…" + } } act.Content = fmt.Sprintf("%d|%s", issue.Index, content) diff --git a/modules/util/truncate.go b/modules/util/truncate.go index 8d0f6309732d7..87638884331dd 100644 --- a/modules/util/truncate.go +++ b/modules/util/truncate.go @@ -6,20 +6,22 @@ package util import "unicode/utf8" +// in UTF8 "…" is 3 bytes so doesn't really gain us anything... +const utf8Ellipsis = "…" +const asciiEllipsis = "..." + // SplitStringAtByteN splits a string at byte n accounting for rune boundaries. (Combining characters are not accounted for.) func SplitStringAtByteN(input string, n int) (left, right string) { if len(input) <= n { - left = input - return + return input, "" } if !utf8.ValidString(input) { - left = input[:n-3] + "..." - right = "..." + input[n-3:] + left = input[:n-3] + asciiEllipsis + right = asciiEllipsis + input[n-3:] return } - // in UTF8 "…" is 3 bytes so doesn't really gain us anything... end := 0 for end <= n-3 { _, size := utf8.DecodeRuneInString(input[end:]) @@ -29,7 +31,35 @@ func SplitStringAtByteN(input string, n int) (left, right string) { end += size } - left = input[:end] + "…" - right = "…" + input[end:] + left = input[:end] + utf8Ellipsis + right = utf8Ellipsis + input[end:] + return +} + +// SplitStringAtRuneN splits a string at rune n accounting for rune boundaries. (Combining characters are not accounted for.) +func SplitStringAtRuneN(input string, n int) (left, right string) { + if !utf8.ValidString(input) { + if len(input) <= n { + return input, "" + } + left = input[:n-3] + asciiEllipsis + right = asciiEllipsis + input[n-3:] + return + } + + if utf8.RuneCountInString(input) <= n { + return input, "" + } + + count := 0 + end := 0 + for count < n-1 { + _, size := utf8.DecodeRuneInString(input[end:]) + end += size + count++ + } + + left = input[:end] + utf8Ellipsis + right = utf8Ellipsis + input[end:] return } diff --git a/modules/util/truncate_test.go b/modules/util/truncate_test.go new file mode 100644 index 0000000000000..34112d0c6c175 --- /dev/null +++ b/modules/util/truncate_test.go @@ -0,0 +1,46 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package util + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSplitString(t *testing.T) { + type testCase struct { + input string + n int + leftSub string + ellipsis string + } + + test := func(tc []*testCase, f func(input string, n int) (left, right string)) { + for _, c := range tc { + l, r := f(c.input, c.n) + assert.Equal(t, c.leftSub+c.ellipsis, l, "test split %s at %d, expected leftSub: %s", c.input, c.n, c.leftSub) + assert.Equal(t, c.ellipsis+c.input[len(c.leftSub):], r, "test split %s at %d, expected rightSub: %s", c.input, c.n, c.input[len(c.leftSub):]) + } + } + + tc := []*testCase{ + {"abc123xyz", 0, "", utf8Ellipsis}, + {"abc123xyz", 1, "", utf8Ellipsis}, + {"abc123xyz", 4, "a", utf8Ellipsis}, + {"啊bc123xyz", 4, "", utf8Ellipsis}, + {"啊bc123xyz", 6, "啊", utf8Ellipsis}, + } + test(tc, SplitStringAtByteN) + + tc = []*testCase{ + {"abc123xyz", 0, "", utf8Ellipsis}, + {"abc123xyz", 1, "", utf8Ellipsis}, + {"abc123xyz", 4, "abc", utf8Ellipsis}, + {"啊bc123xyz", 4, "啊bc", utf8Ellipsis}, + {"啊bc123xyz", 6, "啊bc12", utf8Ellipsis}, + } + test(tc, SplitStringAtRuneN) +} From b4f3435b99c49e2a0c557c2d2c39c63a0513333b Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Tue, 7 Dec 2021 18:57:28 +0800 Subject: [PATCH 2/5] more unit tests --- modules/util/truncate_test.go | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/modules/util/truncate_test.go b/modules/util/truncate_test.go index 34112d0c6c175..34cbafa1ba87d 100644 --- a/modules/util/truncate_test.go +++ b/modules/util/truncate_test.go @@ -21,8 +21,13 @@ func TestSplitString(t *testing.T) { test := func(tc []*testCase, f func(input string, n int) (left, right string)) { for _, c := range tc { l, r := f(c.input, c.n) - assert.Equal(t, c.leftSub+c.ellipsis, l, "test split %s at %d, expected leftSub: %s", c.input, c.n, c.leftSub) - assert.Equal(t, c.ellipsis+c.input[len(c.leftSub):], r, "test split %s at %d, expected rightSub: %s", c.input, c.n, c.input[len(c.leftSub):]) + if c.ellipsis != "" { + assert.Equal(t, c.leftSub+c.ellipsis, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub) + assert.Equal(t, c.ellipsis+c.input[len(c.leftSub):], r, "test split %s at %d, expected rightSub: %q", c.input, c.n, c.input[len(c.leftSub):]) + } else { + assert.Equal(t, c.leftSub, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub) + assert.Equal(t, "", r, "test split %q at %d, expected rightSub: %q", c.input, c.n, "") + } } } @@ -32,6 +37,10 @@ func TestSplitString(t *testing.T) { {"abc123xyz", 4, "a", utf8Ellipsis}, {"啊bc123xyz", 4, "", utf8Ellipsis}, {"啊bc123xyz", 6, "啊", utf8Ellipsis}, + {"啊bc", 5, "啊bc", ""}, + {"啊bc", 6, "啊bc", ""}, + {"abc\xef\x03\xfe", 3, "", asciiEllipsis}, + {"abc\xef\x03\xfe", 4, "a", asciiEllipsis}, } test(tc, SplitStringAtByteN) @@ -41,6 +50,10 @@ func TestSplitString(t *testing.T) { {"abc123xyz", 4, "abc", utf8Ellipsis}, {"啊bc123xyz", 4, "啊bc", utf8Ellipsis}, {"啊bc123xyz", 6, "啊bc12", utf8Ellipsis}, + {"啊bc", 3, "啊bc", ""}, + {"啊bc", 4, "啊bc", ""}, + {"abc\xef\x03\xfe", 3, "", asciiEllipsis}, + {"abc\xef\x03\xfe", 4, "a", asciiEllipsis}, } test(tc, SplitStringAtRuneN) } From 89ea1bded0f5bc964a21cc26c9cf24900ba6d6f0 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Tue, 7 Dec 2021 18:59:21 +0800 Subject: [PATCH 3/5] refactor --- modules/notification/action/action.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/notification/action/action.go b/modules/notification/action/action.go index d5007fbbf3084..ae59c3639a35b 100644 --- a/modules/notification/action/action.go +++ b/modules/notification/action/action.go @@ -101,14 +101,15 @@ func (a *actionNotifier) NotifyCreateIssueComment(doer *user_model.User, repo *m IsPrivate: issue.Repo.IsPrivate, } - content, truncatedRight := util.SplitStringAtRuneN(comment.Content, 200) + truncatedContent, truncatedRight := util.SplitStringAtRuneN(comment.Content, 200) if truncatedRight != "" { // in case the content is in a Latin family language, we remove the last broken word. - if lastSpaceIdx := strings.LastIndex(content, " "); lastSpaceIdx != -1 && (len(content)-lastSpaceIdx < 15) { - content = comment.Content[:lastSpaceIdx] + "…" + lastSpaceIdx := strings.LastIndex(truncatedContent, " ") + if lastSpaceIdx != -1 && (len(truncatedContent)-lastSpaceIdx < 15) { + truncatedContent = truncatedContent[:lastSpaceIdx] + "…" } } - act.Content = fmt.Sprintf("%d|%s", issue.Index, content) + act.Content = fmt.Sprintf("%d|%s", issue.Index, truncatedContent) if issue.IsPull { act.OpType = models.ActionCommentPull From c11be887d4ba077b68d0bc2722db961f29b2ebb8 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Tue, 7 Dec 2021 19:14:57 +0800 Subject: [PATCH 4/5] fix some edge cases --- modules/util/truncate.go | 21 ++++++++------------- modules/util/truncate_test.go | 2 ++ 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/modules/util/truncate.go b/modules/util/truncate.go index 87638884331dd..38c2c0d1d6311 100644 --- a/modules/util/truncate.go +++ b/modules/util/truncate.go @@ -17,9 +17,10 @@ func SplitStringAtByteN(input string, n int) (left, right string) { } if !utf8.ValidString(input) { - left = input[:n-3] + asciiEllipsis - right = asciiEllipsis + input[n-3:] - return + if n-3 < 0 { + return input, "" + } + return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:] } end := 0 @@ -31,20 +32,16 @@ func SplitStringAtByteN(input string, n int) (left, right string) { end += size } - left = input[:end] + utf8Ellipsis - right = utf8Ellipsis + input[end:] - return + return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:] } // SplitStringAtRuneN splits a string at rune n accounting for rune boundaries. (Combining characters are not accounted for.) func SplitStringAtRuneN(input string, n int) (left, right string) { if !utf8.ValidString(input) { - if len(input) <= n { + if len(input) <= n || n-3 < 0 { return input, "" } - left = input[:n-3] + asciiEllipsis - right = asciiEllipsis + input[n-3:] - return + return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:] } if utf8.RuneCountInString(input) <= n { @@ -59,7 +56,5 @@ func SplitStringAtRuneN(input string, n int) (left, right string) { count++ } - left = input[:end] + utf8Ellipsis - right = utf8Ellipsis + input[end:] - return + return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:] } diff --git a/modules/util/truncate_test.go b/modules/util/truncate_test.go index 34cbafa1ba87d..e505a6ee4aaad 100644 --- a/modules/util/truncate_test.go +++ b/modules/util/truncate_test.go @@ -41,6 +41,7 @@ func TestSplitString(t *testing.T) { {"啊bc", 6, "啊bc", ""}, {"abc\xef\x03\xfe", 3, "", asciiEllipsis}, {"abc\xef\x03\xfe", 4, "a", asciiEllipsis}, + {"\xef\x03", 1, "\xef\x03", ""}, } test(tc, SplitStringAtByteN) @@ -54,6 +55,7 @@ func TestSplitString(t *testing.T) { {"啊bc", 4, "啊bc", ""}, {"abc\xef\x03\xfe", 3, "", asciiEllipsis}, {"abc\xef\x03\xfe", 4, "a", asciiEllipsis}, + {"\xef\x03", 1, "\xef\x03", ""}, } test(tc, SplitStringAtRuneN) } From e38f67a1b0d3ebba5a87df0f7881c94ad4a599e0 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Tue, 7 Dec 2021 21:23:20 +0800 Subject: [PATCH 5/5] use SplitStringAtByteN for comment content --- modules/notification/action/action.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/notification/action/action.go b/modules/notification/action/action.go index ae59c3639a35b..81a011fbed684 100644 --- a/modules/notification/action/action.go +++ b/modules/notification/action/action.go @@ -101,7 +101,7 @@ func (a *actionNotifier) NotifyCreateIssueComment(doer *user_model.User, repo *m IsPrivate: issue.Repo.IsPrivate, } - truncatedContent, truncatedRight := util.SplitStringAtRuneN(comment.Content, 200) + truncatedContent, truncatedRight := util.SplitStringAtByteN(comment.Content, 200) if truncatedRight != "" { // in case the content is in a Latin family language, we remove the last broken word. lastSpaceIdx := strings.LastIndex(truncatedContent, " ")