From dca05ee0a5d840eabc95a7c4c8ff762a1ccba73f Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Fri, 5 Nov 2021 21:57:43 +0000 Subject: [PATCH 01/30] Add warning for BIDI characters in page renders and in diffs Fix #17514 Signed-off-by: Andrew Thornton --- modules/charset/charset.go | 57 ++++++++++++++++ modules/charset/charset_test.go | 93 +++++++++++++++++++++++++- options/locale/locale_en-US.ini | 3 + routers/web/repo/lfs.go | 1 + routers/web/repo/view.go | 7 ++ routers/web/repo/wiki.go | 4 ++ services/gitdiff/gitdiff.go | 21 ++++++ templates/repo/diff/section_split.tmpl | 10 +-- templates/repo/settings/lfs_file.tmpl | 9 +++ templates/repo/view_file.tmpl | 9 +++ templates/repo/wiki/view.tmpl | 33 ++++++++- web_src/js/features/common-global.js | 6 ++ web_src/js/features/diff.js | 28 ++++++++ web_src/js/index.js | 3 +- web_src/less/_base.less | 4 ++ web_src/less/_review.less | 9 +++ 16 files changed, 287 insertions(+), 10 deletions(-) diff --git a/modules/charset/charset.go b/modules/charset/charset.go index ae5cf5aa1a4e..efc7d85efa14 100644 --- a/modules/charset/charset.go +++ b/modules/charset/charset.go @@ -23,6 +23,63 @@ import ( // UTF8BOM is the utf-8 byte-order marker var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'} +// BIDIRunes are runes that are explicitly mentioned in CVE-2021-42574 +var BIDIRunes = "\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069" + +// ContainsBIDIRuneString checks some text for any bidi rune +func ContainsBIDIRuneString(text string) bool { + return strings.ContainsAny(text, BIDIRunes) +} + +// ContainsBIDIRuneBytes checks some text for any bidi rune +func ContainsBIDIRuneBytes(text []byte) bool { + return bytes.ContainsAny(text, BIDIRunes) +} + +// ContainsBIDIRuneReader checks some text for any bidi rune +func ContainsBIDIRuneReader(text io.Reader) (bool, error) { + buf := make([]byte, 4096) + readStart := 0 + var err error + var n int + for err == nil { + n, err = text.Read(buf[readStart:]) + bs := buf[:n] + i := 0 + inner: + for i < n { + r, size := utf8.DecodeRune(bs[i:]) + if r == utf8.RuneError { + // need to decide what to do here... runes can be at most 4 bytes - so... i123n + if n-i > 3 { + // this is a real broken rune + return true, fmt.Errorf("text contains bad rune: %x", bs[i]) + } + + break inner + } + if strings.ContainsRune(BIDIRunes, r) { + return true, nil + } + i += size + } + if n > 0 { + readStart = 0 + } + if i < n { + copy(buf, bs[i:n]) + readStart = n - i + } + } + if readStart > 0 { + return true, fmt.Errorf("text contains bad rune: %x", buf[0]) + } + if err == io.EOF { + return false, nil + } + return true, err +} + // ToUTF8WithFallbackReader detects the encoding of content and coverts to UTF-8 reader if possible func ToUTF8WithFallbackReader(rd io.Reader) io.Reader { var buf = make([]byte, 2048) diff --git a/modules/charset/charset_test.go b/modules/charset/charset_test.go index 33f0c10a7a20..d4b01bcd15ad 100644 --- a/modules/charset/charset_test.go +++ b/modules/charset/charset_test.go @@ -5,11 +5,11 @@ package charset import ( + "bytes" "strings" "testing" "code.gitea.io/gitea/modules/setting" - "github.com/stretchr/testify/assert" ) @@ -272,3 +272,94 @@ func stringMustEndWith(t *testing.T, expected string, value string) { func bytesMustStartWith(t *testing.T, expected []byte, value []byte) { assert.Equal(t, expected, value[:len(expected)]) } + +var bidiTestCases = []struct { + name string + text string + want bool +}{ + { + name: "Accented characters", + text: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}), + want: false, + }, + { + name: "Program", + text: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})", + want: false, + }, + { + name: "CVE testcase", + text: "if access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {", + want: true, + }, +} + +func TestContainsBIDIRuneString(t *testing.T) { + for _, tt := range bidiTestCases { + t.Run(tt.name, func(t *testing.T) { + if got := ContainsBIDIRuneString(tt.text); got != tt.want { + t.Errorf("ContainsBIDIRuneString() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestContainsBIDIRuneBytes(t *testing.T) { + for _, tt := range bidiTestCases { + t.Run(tt.name, func(t *testing.T) { + if got := ContainsBIDIRuneBytes([]byte(tt.text)); got != tt.want { + t.Errorf("ContainsBIDIRuneBytes() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestContainsBIDIRuneReader(t *testing.T) { + for _, tt := range bidiTestCases { + t.Run(tt.name, func(t *testing.T) { + got, err := ContainsBIDIRuneReader(strings.NewReader(tt.text)) + if err != nil { + t.Errorf("ContainsBIDIRuneReader() error = %v", err) + return + } + if got != tt.want { + t.Errorf("ContainsBIDIRuneReader() = %v, want %v", got, tt.want) + } + }) + } + + // now we need some specific tests with broken runes + for _, tt := range bidiTestCases { + t.Run(tt.name+"-terminal-xc2", func(t *testing.T) { + bs := []byte(tt.text) + bs = append(bs, 0xc2) + got, err := ContainsBIDIRuneReader(bytes.NewReader(bs)) + if !tt.want { + if err == nil { + t.Errorf("ContainsBIDIRuneReader() should have errored") + return + } + } else if !got { + t.Errorf("ContainsBIDIRuneReader() should have returned true") + return + } + }) + } + + for _, tt := range bidiTestCases { + t.Run(tt.name+"-prefix-xff", func(t *testing.T) { + bs := append([]byte{0xff}, []byte(tt.text)...) + got, err := ContainsBIDIRuneReader(bytes.NewReader(bs)) + if err == nil { + t.Errorf("ContainsBIDIRuneReader() should have errored") + return + } + if !got { + t.Errorf("ContainsBIDIRuneReader() should have returned true") + return + } + }) + } + +} diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index c1762799ebcf..f01afe04b45e 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -977,6 +977,8 @@ file_view_rendered = View Rendered file_view_raw = View Raw file_permalink = Permalink file_too_large = The file is too large to be shown. +bidi_header = `This file contains hidden Unicode characters!` +bidi_description = `This file contains hidden Unicode characters that may be processed differently from what appears below.
If your use case is intentional and legitimate, you can safely ignore this warning.
Consult documentation of your favorite text editor for how to open this file using `DOS (CP 437)` encoding instead of Unicode, to reveal hidden characters.` file_copy_permalink = Copy Permalink video_not_supported_in_browser = Your browser does not support the HTML5 'video' tag. audio_not_supported_in_browser = Your browser does not support the HTML5 'audio' tag. @@ -2057,6 +2059,7 @@ diff.protected = Protected diff.image.side_by_side = Side by Side diff.image.swipe = Swipe diff.image.overlay = Overlay +diff.has_bidi = This line has hidden Unicode characters releases.desc = Track project versions and downloads. release.releases = Releases diff --git a/routers/web/repo/lfs.go b/routers/web/repo/lfs.go index 5e24cfa3c0a8..ea859cf6806b 100644 --- a/routers/web/repo/lfs.go +++ b/routers/web/repo/lfs.go @@ -316,6 +316,7 @@ func LFSFileGet(ctx *context.Context) { output.WriteString(fmt.Sprintf(`
  • %s
  • `, index+1, index+1, line)) } ctx.Data["FileContent"] = gotemplate.HTML(output.String()) + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(output.String()) output.Reset() for i := 0; i < len(lines); i++ { diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 90be631c7349..68fc7ca40534 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -332,11 +332,13 @@ func renderDirectory(ctx *context.Context, treeLink string) { if err != nil { log.Error("Render failed: %v then fallback", err) bs, _ := io.ReadAll(rd) + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(bs) ctx.Data["FileContent"] = strings.ReplaceAll( gotemplate.HTMLEscapeString(string(bs)), "\n", `
    `, ) } else { ctx.Data["FileContent"] = result.String() + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(result.String()) } } else { ctx.Data["IsRenderedHTML"] = true @@ -344,6 +346,7 @@ func renderDirectory(ctx *context.Context, treeLink string) { if err != nil { log.Error("ReadAll failed: %v", err) } + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(buf) ctx.Data["FileContent"] = strings.ReplaceAll( gotemplate.HTMLEscapeString(string(buf)), "\n", `
    `, ) @@ -490,9 +493,11 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st return } ctx.Data["FileContent"] = result.String() + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(result.String()) } else if readmeExist { buf, _ := io.ReadAll(rd) ctx.Data["IsRenderedHTML"] = true + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(buf) ctx.Data["FileContent"] = strings.ReplaceAll( gotemplate.HTMLEscapeString(string(buf)), "\n", `
    `, ) @@ -501,6 +506,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st lineNums := linesBytesCount(buf) ctx.Data["NumLines"] = strconv.Itoa(lineNums) ctx.Data["NumLinesSet"] = true + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(buf) ctx.Data["FileContent"] = highlight.File(lineNums, blob.Name(), buf) } if !isLFSFile { @@ -550,6 +556,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st return } ctx.Data["FileContent"] = result.String() + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(result.String()) } } diff --git a/routers/web/repo/wiki.go b/routers/web/repo/wiki.go index a571c46fd73e..de51e93f7381 100644 --- a/routers/web/repo/wiki.go +++ b/routers/web/repo/wiki.go @@ -16,6 +16,7 @@ import ( "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" @@ -232,6 +233,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } ctx.Data["content"] = buf.String() + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(buf.String()) buf.Reset() if err := markdown.Render(rctx, bytes.NewReader(sidebarContent), &buf); err != nil { @@ -243,6 +245,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { } ctx.Data["sidebarPresent"] = sidebarContent != nil ctx.Data["sidebarContent"] = buf.String() + ctx.Data["sidebarHasBIDI"] = charset.ContainsBIDIRuneString(buf.String()) buf.Reset() if err := markdown.Render(rctx, bytes.NewReader(footerContent), &buf); err != nil { @@ -254,6 +257,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { } ctx.Data["footerPresent"] = footerContent != nil ctx.Data["footerContent"] = buf.String() + ctx.Data["footerHasBIDI"] = charset.ContainsBIDIRuneString(buf.String()) // get commit count - wiki revisions commitsCount, _ := wikiRepo.FileCommitsCount("master", pageFilename) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index f843bc4dcf9d..df99c96408c6 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -117,6 +117,27 @@ func (d *DiffLine) GetCommentSide() string { return d.Comments[0].DiffSide() } +// HasBIDI returns true if there is a BIDI rune in this line +func (d *DiffLine) HasBIDI() bool { + return charset.ContainsBIDIRuneString(d.Content) +} + +// LeftHasBIDI returns true if there is a BIDI rune in this line +func (d *DiffLine) LeftHasBIDI() bool { + if d.LeftIdx > 0 { + return charset.ContainsBIDIRuneString(d.Content) + } + return false +} + +// RightHasBIDI returns true if there is a BIDI rune in this line +func (d *DiffLine) RightHasBIDI() bool { + if d.RightIdx > 0 { + return charset.ContainsBIDIRuneString(d.Content) + } + return false +} + // GetLineTypeMarker returns the line type marker func (d *DiffLine) GetLineTypeMarker() string { if strings.IndexByte(" +-", d.Content[0]) > -1 { diff --git a/templates/repo/diff/section_split.tmpl b/templates/repo/diff/section_split.tmpl index aed6d784b307..ebc88cffe19a 100644 --- a/templates/repo/diff/section_split.tmpl +++ b/templates/repo/diff/section_split.tmpl @@ -22,22 +22,22 @@ {{end}} - {{$section.GetComputedInlineDiffFor $line}} + {{if $line.LeftHasBIDI}}️⚠{{end}}{{$section.GetComputedInlineDiffFor $line}} {{else if and (eq .GetType 3) $hasmatch}}{{/* DEL */}} {{$match := index $section.Lines $line.Match}} - {{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{svg "octicon-plus"}}{{end}}{{if $line.LeftIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}} + {{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{svg "octicon-plus"}}{{end}}{{if $line.LeftHasBIDI}}️⚠{{end}}{{if $line.LeftIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}} {{if $match.RightIdx}}{{end}} - {{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{svg "octicon-plus"}}{{end}}{{if $match.RightIdx}}{{$section.GetComputedInlineDiffFor $match}}{{end}} + {{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{svg "octicon-plus"}}{{end}}{{if $match.RightHasBIDI}}️⚠{{end}}{{if $match.RightIdx}}{{$section.GetComputedInlineDiffFor $match}}{{end}} {{else}} {{if $line.LeftIdx}}{{end}} - {{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 2))}}{{svg "octicon-plus"}}{{end}}{{if $line.LeftIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}} + {{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 2))}}{{svg "octicon-plus"}}{{end}}{{if $line.LeftHasBIDI}}️⚠{{end}}{{if $line.LeftIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}} {{if $line.RightIdx}}{{end}} - {{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 3))}}{{svg "octicon-plus"}}{{end}}{{if $line.RightIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}} + {{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 3))}}{{svg "octicon-plus"}}{{end}}{{if $line.RightHasBIDI}}️⚠{{end}}{{if $line.RightIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}} {{end}} {{if and (eq .GetType 3) $hasmatch}} diff --git a/templates/repo/settings/lfs_file.tmpl b/templates/repo/settings/lfs_file.tmpl index 478c034e1107..5719571da279 100644 --- a/templates/repo/settings/lfs_file.tmpl +++ b/templates/repo/settings/lfs_file.tmpl @@ -12,6 +12,15 @@
    + {{if .HasBIDI}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{.i18n.Tr "repo.bidi_header"}} +
    + {{.i18n.Tr "repo.bidi_description" | Str2html}} +
    + {{end}}
    {{if .IsMarkup}} {{if .FileContent}}{{.FileContent | Safe}}{{end}} diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index 0c8990a4f5c8..0d9aa99b61f5 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -64,6 +64,15 @@ {{end}}
    + {{if .HasBIDI}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{.i18n.Tr "repo.bidi_header"}} +
    + {{.i18n.Tr "repo.bidi_description" | Str2html}} +
    + {{end}}
    {{if .IsMarkup}} {{if .FileContent}}{{.FileContent | Safe}}{{end}} diff --git a/templates/repo/wiki/view.tmpl b/templates/repo/wiki/view.tmpl index a393fb20a18f..a2d6eb7b5e7b 100644 --- a/templates/repo/wiki/view.tmpl +++ b/templates/repo/wiki/view.tmpl @@ -62,6 +62,15 @@ {{end}}
    + {{if .HasBIDI}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{.i18n.Tr "repo.bidi_header"}} +
    + {{.i18n.Tr "repo.bidi_description" | Str2html}} +
    + {{end}} {{.content | Str2html}}
    {{if .sidebarPresent}} @@ -70,6 +79,15 @@ {{if and .CanWriteWiki (not .Repository.IsMirror)}} {{svg "octicon-pencil"}} {{end}} + {{if .sidebarHasBIDI}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{.i18n.Tr "repo.bidi_header"}} +
    + {{.i18n.Tr "repo.bidi_description" | Str2html}} +
    + {{end}} {{.sidebarContent | Str2html}}
    @@ -77,9 +95,18 @@
    {{if .footerPresent}}
    - {{if and .CanWriteWiki (not .Repository.IsMirror)}} - {{svg "octicon-pencil"}} - {{end}} + {{if and .CanWriteWiki (not .Repository.IsMirror)}} + {{svg "octicon-pencil"}} + {{end}} + {{if .footerHasBIDI}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{.i18n.Tr "repo.bidi_header"}} +
    + {{.i18n.Tr "repo.bidi_description" | Str2html}} +
    + {{end}} {{.footerContent | Str2html}}
    {{end}} diff --git a/web_src/js/features/common-global.js b/web_src/js/features/common-global.js index 04d44d8142ab..eb5216bfdc7b 100644 --- a/web_src/js/features/common-global.js +++ b/web_src/js/features/common-global.js @@ -82,6 +82,12 @@ export function initGlobalCommon() { } // Semantic UI modules. + $('.message .close').on('click', function() { + $(this) + .closest('.message') + .hide(); + }); + $('.dropdown:not(.custom)').dropdown({ fullTextSearch: 'exact' }); diff --git a/web_src/js/features/diff.js b/web_src/js/features/diff.js index ef0aaceeeb50..75797733b4ea 100644 --- a/web_src/js/features/diff.js +++ b/web_src/js/features/diff.js @@ -22,3 +22,31 @@ export function initDiffShowMore() { }); }); } + +export function initShowBidi() { + $('a.code-has-bidi').on('click', (e) => { + e.preventDefault(); + + $('a.code-has-bidi').each((_, target) => { + const inner = $(target).siblings().closest('.code-inner'); + const escaped = inner.data('escaped'); + let original = inner.data('original'); + + if (escaped) { + inner.html(original); + inner.data('escaped', ''); + } else { + if (!original) { + original = $(inner).html(); + inner.data('original', original); + } + + inner.html(original.replaceAll(/([\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069])/g, (match) => { + const value = match.charCodeAt(0).toString(16); + return `&#${value};`; + })); + inner.data('escaped', 'escaped'); + } + }); + }); +} diff --git a/web_src/js/index.js b/web_src/js/index.js index a3bd35175e5a..ecec6683a460 100644 --- a/web_src/js/index.js +++ b/web_src/js/index.js @@ -20,7 +20,7 @@ import {initNotificationCount, initNotificationsTable} from './features/notifica import {initLastCommitLoader} from './features/lastcommitloader.js'; import {initIssueContentHistory} from './features/issue-content-history.js'; import {initStopwatch} from './features/stopwatch.js'; -import {initDiffShowMore} from './features/diff.js'; +import {initDiffShowMore, initShowBidi} from './features/diff.js'; import {initCommentContent, initMarkupContent} from './markup/content.js'; import {initUserAuthLinkAccountView, initUserAuthOauth2} from './features/user-auth.js'; @@ -135,6 +135,7 @@ $(document).ready(async () => { initRepoReleaseEditor(); initRepoRelease(); initDiffShowMore(); + initShowBidi(); initIssueContentHistory(); initAdminUserListSearchForm(); initGlobalCopyToClipboardListener(); diff --git a/web_src/less/_base.less b/web_src/less/_base.less index c09f3a2bdd8a..01b7c1770292 100644 --- a/web_src/less/_base.less +++ b/web_src/less/_base.less @@ -2118,3 +2118,7 @@ table th[data-sortt-desc] { height: 15px; } } + +.escaped-char { + background-color: red; +} diff --git a/web_src/less/_review.less b/web_src/less/_review.less index 12bd6a608a8b..eaa3ca31f75c 100644 --- a/web_src/less/_review.less +++ b/web_src/less/_review.less @@ -20,6 +20,15 @@ opacity: 1; } +.diff-file-box .code-has-bidi { + opacity: 1; + padding-left: 0 !important; + padding-right: 0 !important; + padding-top: 2px; + padding-bottom: 2px; + font-family: var(--fonts-emoji); +} + .repository .diff-file-box .code-diff .add-comment-left, .repository .diff-file-box .code-diff .add-comment-right, .repository .diff-file-box .code-diff .add-code-comment .add-comment-left, From 449cb26312d4bfaaf0e15909a246800bc196e03e Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Fri, 5 Nov 2021 22:06:44 +0000 Subject: [PATCH 02/30] as per review Signed-off-by: Andrew Thornton --- modules/charset/charset_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/charset/charset_test.go b/modules/charset/charset_test.go index d4b01bcd15ad..e73e4f4f1b3b 100644 --- a/modules/charset/charset_test.go +++ b/modules/charset/charset_test.go @@ -10,6 +10,7 @@ import ( "testing" "code.gitea.io/gitea/modules/setting" + "github.com/stretchr/testify/assert" ) From 40b86286562156c672b97b4a08a0af6ee172fcda Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sat, 6 Nov 2021 22:13:47 +0000 Subject: [PATCH 03/30] Adjust to only put the warning on BIDI lines without RTL chars Signed-off-by: Andrew Thornton --- modules/charset/charset.go | 63 +++++++++++++++++++-------- modules/charset/charset_test.go | 54 ++++++++++++++++------- routers/web/repo/blame.go | 3 ++ routers/web/repo/lfs.go | 2 +- routers/web/repo/view.go | 14 +++--- routers/web/repo/wiki.go | 6 +-- services/gitdiff/gitdiff.go | 7 +-- templates/repo/blame.tmpl | 2 +- templates/repo/settings/lfs_file.tmpl | 2 +- templates/repo/view_file.tmpl | 2 +- templates/repo/wiki/view.tmpl | 6 +-- web_src/less/_review.less | 3 +- 12 files changed, 110 insertions(+), 54 deletions(-) diff --git a/modules/charset/charset.go b/modules/charset/charset.go index efc7d85efa14..cf8f40e28535 100644 --- a/modules/charset/charset.go +++ b/modules/charset/charset.go @@ -18,6 +18,7 @@ import ( "github.com/gogs/chardet" "golang.org/x/net/html/charset" "golang.org/x/text/transform" + "golang.org/x/text/unicode/bidi" ) // UTF8BOM is the utf-8 byte-order marker @@ -26,22 +27,31 @@ var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'} // BIDIRunes are runes that are explicitly mentioned in CVE-2021-42574 var BIDIRunes = "\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069" -// ContainsBIDIRuneString checks some text for any bidi rune -func ContainsBIDIRuneString(text string) bool { - return strings.ContainsAny(text, BIDIRunes) +// ContainsBIDIRuneString checks for potential bidi misuse with lines +// containing bidi characters but no RTL characters +func ContainsBIDIRuneString(text string) (bad, any bool) { + bad, any, _ = ContainsBIDIRuneReader(strings.NewReader(text)) + return } -// ContainsBIDIRuneBytes checks some text for any bidi rune -func ContainsBIDIRuneBytes(text []byte) bool { - return bytes.ContainsAny(text, BIDIRunes) +// ContainsBIDIRuneBytes checks for potential bidi misuse with lines +// containing bidi characters but no RTL characters +func ContainsBIDIRuneBytes(text []byte) (bad, any bool) { + bad, any, _ = ContainsBIDIRuneReader(bytes.NewReader(text)) + return } -// ContainsBIDIRuneReader checks some text for any bidi rune -func ContainsBIDIRuneReader(text io.Reader) (bool, error) { +// ContainsBIDIRuneReader checks for potential bidi misuse with lines +// containing bidi characters but no RTL characters +func ContainsBIDIRuneReader(text io.Reader) (bad, any bool, err error) { buf := make([]byte, 4096) readStart := 0 - var err error var n int + anyBIDI := false + lineHasBIDI := false + lineHasRTLScript := false + lineHasLTRScript := false + for err == nil { n, err = text.Read(buf[readStart:]) bs := buf[:n] @@ -50,16 +60,32 @@ func ContainsBIDIRuneReader(text io.Reader) (bool, error) { for i < n { r, size := utf8.DecodeRune(bs[i:]) if r == utf8.RuneError { - // need to decide what to do here... runes can be at most 4 bytes - so... i123n + // need to decide what to do here... runes can be at most 4 bytes - so... if n-i > 3 { // this is a real broken rune - return true, fmt.Errorf("text contains bad rune: %x", bs[i]) + return true, true, fmt.Errorf("text contains bad rune: %x", bs[i]) } - break inner } - if strings.ContainsRune(BIDIRunes, r) { - return true, nil + switch { + case r == '\n': + if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript { + return true, true, nil + } + lineHasBIDI = false + lineHasRTLScript = false + lineHasLTRScript = false + case strings.ContainsRune(BIDIRunes, r): + lineHasBIDI = true + anyBIDI = true + default: + p, _ := bidi.Lookup(bs[i : i+size]) + c := p.Class() + if c == bidi.R || c == bidi.AL { + lineHasRTLScript = true + } else if c == bidi.L { + lineHasLTRScript = true + } } i += size } @@ -72,12 +98,15 @@ func ContainsBIDIRuneReader(text io.Reader) (bool, error) { } } if readStart > 0 { - return true, fmt.Errorf("text contains bad rune: %x", buf[0]) + return true, true, fmt.Errorf("text contains bad rune: %x", buf[0]) } if err == io.EOF { - return false, nil + if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript { + return true, true, nil + } + return false, anyBIDI, nil } - return true, err + return true, true, err } // ToUTF8WithFallbackReader detects the encoding of content and coverts to UTF-8 reader if possible diff --git a/modules/charset/charset_test.go b/modules/charset/charset_test.go index e73e4f4f1b3b..3b6e16d4c414 100644 --- a/modules/charset/charset_test.go +++ b/modules/charset/charset_test.go @@ -277,30 +277,52 @@ func bytesMustStartWith(t *testing.T, expected []byte, value []byte) { var bidiTestCases = []struct { name string text string - want bool + bad bool + any bool }{ { name: "Accented characters", text: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}), - want: false, }, { name: "Program", text: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})", - want: false, }, { name: "CVE testcase", text: "if access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {", - want: true, + bad: true, + any: true, + }, + { + name: "Mixed testcase", + text: `Many computer programs fail to display bidirectional text correctly. +For example, the Hebrew name Sarah (שרה) is spelled: sin (ש) (which appears rightmost), +then resh (ר), and finally heh (ה) (which should appear leftmost).`, + }, + { + name: "Mixed testcase", + text: `Many computer programs fail to display bidirectional text correctly. + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, + any: true, + }, + { + name: "Mixed testcase with fail", + text: `Many computer programs fail to display bidirectional text correctly. + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` + + "\nif access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {\n", + bad: true, + any: true, }, } func TestContainsBIDIRuneString(t *testing.T) { for _, tt := range bidiTestCases { t.Run(tt.name, func(t *testing.T) { - if got := ContainsBIDIRuneString(tt.text); got != tt.want { - t.Errorf("ContainsBIDIRuneString() = %v, want %v", got, tt.want) + if bad, any := ContainsBIDIRuneString(tt.text); bad != tt.bad || any != tt.any { + t.Errorf("ContainsBIDIRuneString() = %v, %v, want %v, %v", bad, any, tt.bad, tt.any) } }) } @@ -309,8 +331,8 @@ func TestContainsBIDIRuneString(t *testing.T) { func TestContainsBIDIRuneBytes(t *testing.T) { for _, tt := range bidiTestCases { t.Run(tt.name, func(t *testing.T) { - if got := ContainsBIDIRuneBytes([]byte(tt.text)); got != tt.want { - t.Errorf("ContainsBIDIRuneBytes() = %v, want %v", got, tt.want) + if bad, any := ContainsBIDIRuneBytes([]byte(tt.text)); bad != tt.bad || any != tt.any { + t.Errorf("ContainsBIDIRuneBytes() = %v, %v, want %v, %v", bad, any, tt.bad, tt.any) } }) } @@ -319,13 +341,13 @@ func TestContainsBIDIRuneBytes(t *testing.T) { func TestContainsBIDIRuneReader(t *testing.T) { for _, tt := range bidiTestCases { t.Run(tt.name, func(t *testing.T) { - got, err := ContainsBIDIRuneReader(strings.NewReader(tt.text)) + bad, any, err := ContainsBIDIRuneReader(strings.NewReader(tt.text)) if err != nil { t.Errorf("ContainsBIDIRuneReader() error = %v", err) return } - if got != tt.want { - t.Errorf("ContainsBIDIRuneReader() = %v, want %v", got, tt.want) + if bad != tt.bad || any != tt.any { + t.Errorf("ContainsBIDIRuneReader() = %v, %v, want %v, %v", bad, any, tt.bad, tt.any) } }) } @@ -335,13 +357,13 @@ func TestContainsBIDIRuneReader(t *testing.T) { t.Run(tt.name+"-terminal-xc2", func(t *testing.T) { bs := []byte(tt.text) bs = append(bs, 0xc2) - got, err := ContainsBIDIRuneReader(bytes.NewReader(bs)) - if !tt.want { + bad, _, err := ContainsBIDIRuneReader(bytes.NewReader(bs)) + if !tt.bad { if err == nil { t.Errorf("ContainsBIDIRuneReader() should have errored") return } - } else if !got { + } else if !bad { t.Errorf("ContainsBIDIRuneReader() should have returned true") return } @@ -351,12 +373,12 @@ func TestContainsBIDIRuneReader(t *testing.T) { for _, tt := range bidiTestCases { t.Run(tt.name+"-prefix-xff", func(t *testing.T) { bs := append([]byte{0xff}, []byte(tt.text)...) - got, err := ContainsBIDIRuneReader(bytes.NewReader(bs)) + bad, _, err := ContainsBIDIRuneReader(bytes.NewReader(bs)) if err == nil { t.Errorf("ContainsBIDIRuneReader() should have errored") return } - if !got { + if !bad { t.Errorf("ContainsBIDIRuneReader() should have returned true") return } diff --git a/routers/web/repo/blame.go b/routers/web/repo/blame.go index 3632d1846eae..ef60abb4b0e5 100644 --- a/routers/web/repo/blame.go +++ b/routers/web/repo/blame.go @@ -12,6 +12,7 @@ import ( "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/highlight" @@ -35,6 +36,7 @@ type blameRow struct { CommitMessage string CommitSince gotemplate.HTML Code gotemplate.HTML + HasBIDI bool } // RefBlame render blame page @@ -249,6 +251,7 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m line = highlight.Code(fileName, line) br.Code = gotemplate.HTML(line) + _, br.HasBIDI = charset.ContainsBIDIRuneString(line) rows = append(rows, br) } } diff --git a/routers/web/repo/lfs.go b/routers/web/repo/lfs.go index ea859cf6806b..5d6a98ed554d 100644 --- a/routers/web/repo/lfs.go +++ b/routers/web/repo/lfs.go @@ -316,7 +316,7 @@ func LFSFileGet(ctx *context.Context) { output.WriteString(fmt.Sprintf(`
  • %s
  • `, index+1, index+1, line)) } ctx.Data["FileContent"] = gotemplate.HTML(output.String()) - ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(output.String()) + ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(output.String()) output.Reset() for i := 0; i < len(lines); i++ { diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 68fc7ca40534..a235cc6d97e7 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -332,13 +332,13 @@ func renderDirectory(ctx *context.Context, treeLink string) { if err != nil { log.Error("Render failed: %v then fallback", err) bs, _ := io.ReadAll(rd) - ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(bs) + ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(bs) ctx.Data["FileContent"] = strings.ReplaceAll( gotemplate.HTMLEscapeString(string(bs)), "\n", `
    `, ) } else { ctx.Data["FileContent"] = result.String() - ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(result.String()) + ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(result.String()) } } else { ctx.Data["IsRenderedHTML"] = true @@ -346,7 +346,7 @@ func renderDirectory(ctx *context.Context, treeLink string) { if err != nil { log.Error("ReadAll failed: %v", err) } - ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(buf) + ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(buf) ctx.Data["FileContent"] = strings.ReplaceAll( gotemplate.HTMLEscapeString(string(buf)), "\n", `
    `, ) @@ -493,11 +493,11 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st return } ctx.Data["FileContent"] = result.String() - ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(result.String()) + ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(result.String()) } else if readmeExist { buf, _ := io.ReadAll(rd) ctx.Data["IsRenderedHTML"] = true - ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(buf) + ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(buf) ctx.Data["FileContent"] = strings.ReplaceAll( gotemplate.HTMLEscapeString(string(buf)), "\n", `
    `, ) @@ -506,7 +506,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st lineNums := linesBytesCount(buf) ctx.Data["NumLines"] = strconv.Itoa(lineNums) ctx.Data["NumLinesSet"] = true - ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(buf) + ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(buf) ctx.Data["FileContent"] = highlight.File(lineNums, blob.Name(), buf) } if !isLFSFile { @@ -556,7 +556,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st return } ctx.Data["FileContent"] = result.String() - ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(result.String()) + ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(result.String()) } } diff --git a/routers/web/repo/wiki.go b/routers/web/repo/wiki.go index de51e93f7381..c606d76d15c8 100644 --- a/routers/web/repo/wiki.go +++ b/routers/web/repo/wiki.go @@ -233,7 +233,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } ctx.Data["content"] = buf.String() - ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(buf.String()) + ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(buf.String()) buf.Reset() if err := markdown.Render(rctx, bytes.NewReader(sidebarContent), &buf); err != nil { @@ -245,7 +245,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { } ctx.Data["sidebarPresent"] = sidebarContent != nil ctx.Data["sidebarContent"] = buf.String() - ctx.Data["sidebarHasBIDI"] = charset.ContainsBIDIRuneString(buf.String()) + ctx.Data["sidebarBadBIDI"], ctx.Data["sidebarHasBIDI"] = charset.ContainsBIDIRuneString(buf.String()) buf.Reset() if err := markdown.Render(rctx, bytes.NewReader(footerContent), &buf); err != nil { @@ -257,7 +257,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { } ctx.Data["footerPresent"] = footerContent != nil ctx.Data["footerContent"] = buf.String() - ctx.Data["footerHasBIDI"] = charset.ContainsBIDIRuneString(buf.String()) + ctx.Data["footerBadBIDI"], ctx.Data["footerHasBIDI"] = charset.ContainsBIDIRuneString(buf.String()) // get commit count - wiki revisions commitsCount, _ := wikiRepo.FileCommitsCount("master", pageFilename) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index df99c96408c6..6e92da0d7de8 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -119,13 +119,14 @@ func (d *DiffLine) GetCommentSide() string { // HasBIDI returns true if there is a BIDI rune in this line func (d *DiffLine) HasBIDI() bool { - return charset.ContainsBIDIRuneString(d.Content) + _, any := charset.ContainsBIDIRuneString(d.Content) + return any } // LeftHasBIDI returns true if there is a BIDI rune in this line func (d *DiffLine) LeftHasBIDI() bool { if d.LeftIdx > 0 { - return charset.ContainsBIDIRuneString(d.Content) + return d.HasBIDI() } return false } @@ -133,7 +134,7 @@ func (d *DiffLine) LeftHasBIDI() bool { // RightHasBIDI returns true if there is a BIDI rune in this line func (d *DiffLine) RightHasBIDI() bool { if d.RightIdx > 0 { - return charset.ContainsBIDIRuneString(d.Content) + return d.HasBIDI() } return false } diff --git a/templates/repo/blame.tmpl b/templates/repo/blame.tmpl index c7c497088a9e..d077dc2f34c6 100644 --- a/templates/repo/blame.tmpl +++ b/templates/repo/blame.tmpl @@ -53,7 +53,7 @@ - {{$row.Code}} + {{if $row.HasBIDI}}️⚠{{end}}{{$row.Code}} {{end}} diff --git a/templates/repo/settings/lfs_file.tmpl b/templates/repo/settings/lfs_file.tmpl index 5719571da279..1e92cac3d961 100644 --- a/templates/repo/settings/lfs_file.tmpl +++ b/templates/repo/settings/lfs_file.tmpl @@ -12,7 +12,7 @@
    - {{if .HasBIDI}} + {{if .BadBIDI}}
    {{svg "octicon-x" 16 "close inside"}}
    diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index 0d9aa99b61f5..c05d51cc4992 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -64,7 +64,7 @@ {{end}}
    - {{if .HasBIDI}} + {{if .BadBIDI}}
    {{svg "octicon-x" 16 "close inside"}}
    diff --git a/templates/repo/wiki/view.tmpl b/templates/repo/wiki/view.tmpl index a2d6eb7b5e7b..3ba93f8ad346 100644 --- a/templates/repo/wiki/view.tmpl +++ b/templates/repo/wiki/view.tmpl @@ -62,7 +62,7 @@ {{end}}
    - {{if .HasBIDI}} + {{if .BadBIDI}}
    {{svg "octicon-x" 16 "close inside"}}
    @@ -79,7 +79,7 @@ {{if and .CanWriteWiki (not .Repository.IsMirror)}} {{svg "octicon-pencil"}} {{end}} - {{if .sidebarHasBIDI}} + {{if .sidebarBadBIDI}}
    {{svg "octicon-x" 16 "close inside"}}
    @@ -98,7 +98,7 @@ {{if and .CanWriteWiki (not .Repository.IsMirror)}} {{svg "octicon-pencil"}} {{end}} - {{if .footerHasBIDI}} + {{if .footerBadBIDI}}
    {{svg "octicon-x" 16 "close inside"}}
    diff --git a/web_src/less/_review.less b/web_src/less/_review.less index eaa3ca31f75c..71d6ed985acf 100644 --- a/web_src/less/_review.less +++ b/web_src/less/_review.less @@ -20,7 +20,8 @@ opacity: 1; } -.diff-file-box .code-has-bidi { +.diff-file-box .code-has-bidi, +.blame-code .code-has-bidi { opacity: 1; padding-left: 0 !important; padding-right: 0 !important; From 3a63d9ddad7b5423e863247ea053de3628c0c4c5 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 7 Nov 2021 19:19:29 +0000 Subject: [PATCH 04/30] Another attempt. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Here we detect if there are examples of bad bidi behaviour and render an error for those, but otherwise we will attempt to detect all zero-widtah characters, combining characters, unusual spaces and control characters and will render these with an escape and unescaped form. (In particular this allows for the detection of pages that contain á and á.) Signed-off-by: Andrew Thornton --- modules/charset/charset.go | 190 ++++++++++++--- modules/charset/charset_test.go | 216 ++++++++++++------ options/locale/locale_en-US.ini | 13 +- routers/web/repo/blame.go | 3 +- routers/web/repo/lfs.go | 6 +- routers/web/repo/view.go | 42 ++-- routers/web/repo/wiki.go | 10 +- services/gitdiff/gitdiff.go | 39 +--- templates/repo/blame.tmpl | 6 +- templates/repo/diff/box.tmpl | 6 +- templates/repo/diff/section_split.tmpl | 10 +- templates/repo/editor/diff_preview.tmpl | 2 +- .../repo/issue/view_content/comments.tmpl | 2 +- templates/repo/settings/lfs_file.tmpl | 21 +- templates/repo/view_file.tmpl | 22 +- templates/repo/wiki/view.tmpl | 48 +++- web_src/js/features/diff.js | 11 +- web_src/js/features/repo-legacy.js | 29 +++ web_src/js/index.js | 4 +- web_src/less/_repository.less | 18 ++ web_src/less/_review.less | 4 +- 21 files changed, 499 insertions(+), 203 deletions(-) diff --git a/modules/charset/charset.go b/modules/charset/charset.go index cf8f40e28535..d06b096bbaad 100644 --- a/modules/charset/charset.go +++ b/modules/charset/charset.go @@ -9,6 +9,7 @@ import ( "fmt" "io" "strings" + "unicode" "unicode/utf8" "code.gitea.io/gitea/modules/log" @@ -24,89 +25,202 @@ import ( // UTF8BOM is the utf-8 byte-order marker var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'} -// BIDIRunes are runes that are explicitly mentioned in CVE-2021-42574 -var BIDIRunes = "\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069" +type EscapeStatus struct { + Escaped bool + HasError bool + HasBadRunes bool + HasControls bool + HasSpaces bool + HasMarks bool + HasBIDI bool + BadBIDI bool + HasRTLScript bool + HasLTRScript bool +} -// ContainsBIDIRuneString checks for potential bidi misuse with lines -// containing bidi characters but no RTL characters -func ContainsBIDIRuneString(text string) (bad, any bool) { - bad, any, _ = ContainsBIDIRuneReader(strings.NewReader(text)) - return +func (status EscapeStatus) Or(other EscapeStatus) EscapeStatus { + status.Escaped = status.Escaped || other.Escaped + status.HasError = status.HasError || other.HasError + status.HasBadRunes = status.HasBadRunes || other.HasBadRunes + status.HasControls = status.HasControls || other.HasControls + status.HasSpaces = status.HasSpaces || other.HasSpaces + status.HasMarks = status.HasMarks || other.HasMarks + status.HasBIDI = status.HasBIDI || other.HasBIDI + status.BadBIDI = status.BadBIDI || other.BadBIDI + status.HasRTLScript = status.HasRTLScript || other.HasRTLScript + status.HasLTRScript = status.HasLTRScript || other.HasLTRScript + return status } -// ContainsBIDIRuneBytes checks for potential bidi misuse with lines -// containing bidi characters but no RTL characters -func ContainsBIDIRuneBytes(text []byte) (bad, any bool) { - bad, any, _ = ContainsBIDIRuneReader(bytes.NewReader(text)) - return +func EscapeControlString(text string) (EscapeStatus, string) { + sb := &strings.Builder{} + escaped, _ := EscapeControlReader(strings.NewReader(text), sb) + return escaped, sb.String() } -// ContainsBIDIRuneReader checks for potential bidi misuse with lines -// containing bidi characters but no RTL characters -func ContainsBIDIRuneReader(text io.Reader) (bad, any bool, err error) { +func EscapeControlBytes(text []byte) (EscapeStatus, []byte) { + buf := &bytes.Buffer{} + escaped, _ := EscapeControlReader(bytes.NewReader(text), buf) + return escaped, buf.Bytes() +} + +func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) { buf := make([]byte, 4096) readStart := 0 var n int - anyBIDI := false + var writePos int + lineHasBIDI := false lineHasRTLScript := false lineHasLTRScript := false +readingloop: for err == nil { n, err = text.Read(buf[readStart:]) - bs := buf[:n] + bs := buf[:n+readStart] i := 0 - inner: - for i < n { + + for i < len(bs) { r, size := utf8.DecodeRune(bs[i:]) - if r == utf8.RuneError { - // need to decide what to do here... runes can be at most 4 bytes - so... - if n-i > 3 { - // this is a real broken rune - return true, true, fmt.Errorf("text contains bad rune: %x", bs[i]) - } - break inner - } + // Now handle the codepoints switch { + case r == utf8.RuneError: + if writePos < i { + if _, err = output.Write(bs[writePos:i]); err != nil { + escaped.HasError = true + return + } + writePos = i + } + // runes can be at most 4 bytes - so... + if len(bs)-i <= 3 { + // if not request more data + copy(buf, bs[i:]) + readStart = n - i + writePos = 0 + continue readingloop + } + // this is a real broken rune + escaped.HasBadRunes = true + escaped.Escaped = true + if _, err = fmt.Fprintf(output, `<%X>`, bs[i:i+size]); err != nil { + escaped.HasError = true + return + } + writePos += size case r == '\n': if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript { - return true, true, nil + escaped.BadBIDI = true } lineHasBIDI = false lineHasRTLScript = false lineHasLTRScript = false - case strings.ContainsRune(BIDIRunes, r): + + case bytes.Contains([]byte("\r\t "), buf[i:i+size]): + // These are acceptable control characters and space characters + case unicode.IsSpace(r): + escaped.HasSpaces = true + escaped.Escaped = true + if writePos < i { + if _, err = output.Write(bs[writePos:i]); err != nil { + escaped.HasError = true + return + } + } + if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + escaped.HasError = true + return + } + writePos = i + size + case unicode.Is(unicode.Bidi_Control, r): + escaped.Escaped = true + escaped.HasBIDI = true + if writePos < i { + if _, err = output.Write(bs[writePos:i]); err != nil { + escaped.HasError = true + return + } + } lineHasBIDI = true - anyBIDI = true + if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + escaped.HasError = true + return + } + writePos = i + size + case unicode.Is(unicode.C, r): + escaped.Escaped = true + escaped.HasControls = true + if writePos < i { + if _, err = output.Write(bs[writePos:i]); err != nil { + escaped.HasError = true + return + } + } + if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + escaped.HasError = true + return + } + writePos = i + size + case unicode.Is(unicode.M, r): + escaped.Escaped = true + escaped.HasMarks = true + if writePos < i { + if _, err = output.Write(bs[writePos:i]); err != nil { + escaped.HasError = true + return + } + } + if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + escaped.HasError = true + return + } + writePos = i + size default: p, _ := bidi.Lookup(bs[i : i+size]) c := p.Class() if c == bidi.R || c == bidi.AL { lineHasRTLScript = true + escaped.HasRTLScript = true } else if c == bidi.L { lineHasLTRScript = true + escaped.HasLTRScript = true } } i += size } if n > 0 { + // we read something... + // write everything unwritten + if writePos < i { + if _, err = output.Write(bs[writePos:i]); err != nil { + escaped.HasError = true + return + } + } + + // reset the starting positions for the next read readStart = 0 - } - if i < n { - copy(buf, bs[i:n]) - readStart = n - i + writePos = 0 } } if readStart > 0 { - return true, true, fmt.Errorf("text contains bad rune: %x", buf[0]) + // this means that there is an incomplete or broken rune at 0-readStart and we read nothing on the last go round + escaped.Escaped = true + escaped.HasBadRunes = true + if _, err = fmt.Fprintf(output, `<%X>`, buf[:readStart]); err != nil { + escaped.HasError = true + return + } } if err == io.EOF { if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript { - return true, true, nil + escaped.BadBIDI = true } - return false, anyBIDI, nil + err = nil + return } - return true, true, err + escaped.HasError = true + return } // ToUTF8WithFallbackReader detects the encoding of content and coverts to UTF-8 reader if possible diff --git a/modules/charset/charset_test.go b/modules/charset/charset_test.go index 3b6e16d4c414..f0bcce8b0ad7 100644 --- a/modules/charset/charset_test.go +++ b/modules/charset/charset_test.go @@ -5,7 +5,7 @@ package charset import ( - "bytes" + "reflect" "strings" "testing" @@ -274,115 +274,193 @@ func bytesMustStartWith(t *testing.T, expected []byte, value []byte) { assert.Equal(t, expected, value[:len(expected)]) } -var bidiTestCases = []struct { - name string - text string - bad bool - any bool -}{ +type escapeControlTest struct { + name string + text string + status EscapeStatus + result string +} + +var escapeControlTests = []escapeControlTest{ + { + name: "", + }, + { + name: "single line western", + text: "single line western", + result: "single line western", + status: EscapeStatus{HasLTRScript: true}, + }, + { + name: "multi line western", + text: "single line western\nmulti line western\n", + result: "single line western\nmulti line western\n", + status: EscapeStatus{HasLTRScript: true}, + }, + { + name: "multi line western non-breaking space", + text: "single line western\nmulti line western\n", + result: `single line western` + "\n" + `multi line western` + "\n", + status: EscapeStatus{Escaped: true, HasLTRScript: true, HasSpaces: true}, + }, { - name: "Accented characters", - text: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}), + name: "mixed scripts: western + japanese", + text: "日属秘ぞしちゅ。Then some western.", + result: "日属秘ぞしちゅ。Then some western.", + status: EscapeStatus{HasLTRScript: true}, }, { - name: "Program", - text: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})", + name: "japanese", + text: "日属秘ぞしちゅ。", + result: "日属秘ぞしちゅ。", + status: EscapeStatus{HasLTRScript: true}, }, { - name: "CVE testcase", - text: "if access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {", - bad: true, - any: true, + name: "hebrew", + text: "עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה", + result: "עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה", + status: EscapeStatus{HasRTLScript: true}, }, { - name: "Mixed testcase", + name: "more hebrew", + text: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( ' ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד. + + המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף. + + בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`, + result: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( ' ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד. + + המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף. + + בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`, + status: EscapeStatus{HasRTLScript: true}, + }, + { + name: "Mixed RTL+LTR", text: `Many computer programs fail to display bidirectional text correctly. For example, the Hebrew name Sarah (שרה) is spelled: sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, + result: `Many computer programs fail to display bidirectional text correctly. +For example, the Hebrew name Sarah (שרה) is spelled: sin (ש) (which appears rightmost), +then resh (ר), and finally heh (ה) (which should appear leftmost).`, + status: EscapeStatus{ + HasRTLScript: true, + HasLTRScript: true, + }, }, { - name: "Mixed testcase", + name: "Mixed RTL+LTR+BIDI", text: `Many computer programs fail to display bidirectional text correctly. - For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, - any: true, + result: `Many computer programs fail to display bidirectional text correctly. + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066" + `` + "\n" + + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, + status: EscapeStatus{ + Escaped: true, + HasBIDI: true, + HasRTLScript: true, + HasLTRScript: true, + }, + }, + { + name: "Accented characters", + text: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}), + result: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}), + status: EscapeStatus{HasLTRScript: true}, + }, + { + name: "Program", + text: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})", + result: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})", + status: EscapeStatus{HasLTRScript: true}, + }, + { + name: "CVE testcase", + text: "if access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {", + result: `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {`, + status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true}, }, { name: "Mixed testcase with fail", text: `Many computer programs fail to display bidirectional text correctly. - For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` + "\nif access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {\n", - bad: true, - any: true, + result: `Many computer programs fail to display bidirectional text correctly. + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066" + `` + "\n" + + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` + + "\n" + `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {` + "\n", + status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true}, }, } -func TestContainsBIDIRuneString(t *testing.T) { - for _, tt := range bidiTestCases { +func TestEscapeControlString(t *testing.T) { + for _, tt := range escapeControlTests { t.Run(tt.name, func(t *testing.T) { - if bad, any := ContainsBIDIRuneString(tt.text); bad != tt.bad || any != tt.any { - t.Errorf("ContainsBIDIRuneString() = %v, %v, want %v, %v", bad, any, tt.bad, tt.any) + status, result := EscapeControlString(tt.text) + if !reflect.DeepEqual(status, tt.status) { + t.Errorf("EscapeControlString() status = %v, wanted= %v", status, tt.status) + } + if result != tt.result { + t.Errorf("EscapeControlString()\nresult= %v,\nwanted= %v", result, tt.result) } }) } } -func TestContainsBIDIRuneBytes(t *testing.T) { - for _, tt := range bidiTestCases { +func TestEscapeControlBytes(t *testing.T) { + for _, tt := range escapeControlTests { t.Run(tt.name, func(t *testing.T) { - if bad, any := ContainsBIDIRuneBytes([]byte(tt.text)); bad != tt.bad || any != tt.any { - t.Errorf("ContainsBIDIRuneBytes() = %v, %v, want %v, %v", bad, any, tt.bad, tt.any) + status, result := EscapeControlBytes([]byte(tt.text)) + if !reflect.DeepEqual(status, tt.status) { + t.Errorf("EscapeControlBytes() status = %v, wanted= %v", status, tt.status) + } + if string(result) != tt.result { + t.Errorf("EscapeControlBytes()\nresult= %v,\nwanted= %v", result, tt.result) } }) } } -func TestContainsBIDIRuneReader(t *testing.T) { - for _, tt := range bidiTestCases { - t.Run(tt.name, func(t *testing.T) { - bad, any, err := ContainsBIDIRuneReader(strings.NewReader(tt.text)) - if err != nil { - t.Errorf("ContainsBIDIRuneReader() error = %v", err) - return - } - if bad != tt.bad || any != tt.any { - t.Errorf("ContainsBIDIRuneReader() = %v, %v, want %v, %v", bad, any, tt.bad, tt.any) - } - }) +func TestEscapeControlReader(t *testing.T) { + // lets add some control characters to the tests + tests := make([]escapeControlTest, 0, len(escapeControlTests)*3) + copy(tests, escapeControlTests) + for _, test := range escapeControlTests { + test.name += " (+Control)" + test.text = "\u001E" + test.text + test.result = `` + "\u001e" + `` + test.result + test.status.Escaped = true + test.status.HasControls = true + tests = append(tests, test) } - // now we need some specific tests with broken runes - for _, tt := range bidiTestCases { - t.Run(tt.name+"-terminal-xc2", func(t *testing.T) { - bs := []byte(tt.text) - bs = append(bs, 0xc2) - bad, _, err := ContainsBIDIRuneReader(bytes.NewReader(bs)) - if !tt.bad { - if err == nil { - t.Errorf("ContainsBIDIRuneReader() should have errored") - return - } - } else if !bad { - t.Errorf("ContainsBIDIRuneReader() should have returned true") - return - } - }) + for _, test := range escapeControlTests { + test.name += " (+Mark)" + test.text = "\u0300" + test.text + test.result = `` + "\u0300" + `` + test.result + test.status.Escaped = true + test.status.HasMarks = true + tests = append(tests, test) } - for _, tt := range bidiTestCases { - t.Run(tt.name+"-prefix-xff", func(t *testing.T) { - bs := append([]byte{0xff}, []byte(tt.text)...) - bad, _, err := ContainsBIDIRuneReader(bytes.NewReader(bs)) - if err == nil { - t.Errorf("ContainsBIDIRuneReader() should have errored") - return + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + input := strings.NewReader(tt.text) + output := &strings.Builder{} + status, err := EscapeControlReader(input, output) + result := output.String() + if err != nil { + t.Errorf("EscapeControlReader(): err = %v", err) } - if !bad { - t.Errorf("ContainsBIDIRuneReader() should have returned true") - return + + if !reflect.DeepEqual(status, tt.status) { + t.Errorf("EscapeControlReader() status = %v, wanted= %v", status, tt.status) + } + if result != tt.result { + t.Errorf("EscapeControlReader()\nresult= %v,\nwanted= %v", result, tt.result) } }) } - } diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index f01afe04b45e..87f1ed54bb41 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -977,8 +977,15 @@ file_view_rendered = View Rendered file_view_raw = View Raw file_permalink = Permalink file_too_large = The file is too large to be shown. -bidi_header = `This file contains hidden Unicode characters!` -bidi_description = `This file contains hidden Unicode characters that may be processed differently from what appears below.
    If your use case is intentional and legitimate, you can safely ignore this warning.
    Consult documentation of your favorite text editor for how to open this file using `DOS (CP 437)` encoding instead of Unicode, to reveal hidden characters.` +bidi_bad_header = `This file contains unexpected BiDi Unicode characters!` +bidi_bad_description = `This file contains unexpected BiDi Unicode characters that may be processed differently from what appears below.
    If your use case is intentional and legitimate, you can safely ignore this warning.
    Click escape to reveal hidden characters.` +bidi_bad_description_escaped = `This file contains unexpected BiDi Unicode characters.
    Hidden unicode characters are escaped below.
    Click unescape to show how they render.` +unicode_header = `This file contains hidden Unicode characters!` +unicode_description = `This file contains hidden Unicode characters that may be processed differently from what appears below.
    If your use case is intentional and legitimate, you can safely ignore this warning.
    Click escape to reveal hidden characters.` +unicode_description_escaped = `This file contains hidden Unicode characters.
    Hidden unicode characters are escaped below.
    Click unescape to show how they render.` + +escape_control_characters = Escape +unescape_control_characters = Unescape file_copy_permalink = Copy Permalink video_not_supported_in_browser = Your browser does not support the HTML5 'video' tag. audio_not_supported_in_browser = Your browser does not support the HTML5 'audio' tag. @@ -2059,7 +2066,7 @@ diff.protected = Protected diff.image.side_by_side = Side by Side diff.image.swipe = Swipe diff.image.overlay = Overlay -diff.has_bidi = This line has hidden Unicode characters +diff.has_escaped = This line has hidden Unicode characters releases.desc = Track project versions and downloads. release.releases = Releases diff --git a/routers/web/repo/blame.go b/routers/web/repo/blame.go index ef60abb4b0e5..0c6ab16c4db0 100644 --- a/routers/web/repo/blame.go +++ b/routers/web/repo/blame.go @@ -36,7 +36,6 @@ type blameRow struct { CommitMessage string CommitSince gotemplate.HTML Code gotemplate.HTML - HasBIDI bool } // RefBlame render blame page @@ -250,8 +249,8 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m fileName := fmt.Sprintf("%v", ctx.Data["FileName"]) line = highlight.Code(fileName, line) + _, line = charset.EscapeControlString(line) br.Code = gotemplate.HTML(line) - _, br.HasBIDI = charset.ContainsBIDIRuneString(line) rows = append(rows, br) } } diff --git a/routers/web/repo/lfs.go b/routers/web/repo/lfs.go index 5d6a98ed554d..a65c0969100d 100644 --- a/routers/web/repo/lfs.go +++ b/routers/web/repo/lfs.go @@ -300,10 +300,11 @@ func LFSFileGet(ctx *context.Context) { rd := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc)) // Building code view blocks with line number on server side. - fileContent, _ := io.ReadAll(rd) + escapedContent := &bytes.Buffer{} + ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, escapedContent) var output bytes.Buffer - lines := strings.Split(string(fileContent), "\n") + lines := strings.Split(escapedContent.String(), "\n") //Remove blank line at the end of file if len(lines) > 0 && lines[len(lines)-1] == "" { lines = lines[:len(lines)-1] @@ -316,7 +317,6 @@ func LFSFileGet(ctx *context.Context) { output.WriteString(fmt.Sprintf(`
  • %s
  • `, index+1, index+1, line)) } ctx.Data["FileContent"] = gotemplate.HTML(output.String()) - ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(output.String()) output.Reset() for i := 0; i < len(lines); i++ { diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index a235cc6d97e7..6f1714e999f4 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -331,24 +331,24 @@ func renderDirectory(ctx *context.Context, treeLink string) { }, rd, &result) if err != nil { log.Error("Render failed: %v then fallback", err) - bs, _ := io.ReadAll(rd) - ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(bs) + buf := &bytes.Buffer{} + ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf) ctx.Data["FileContent"] = strings.ReplaceAll( - gotemplate.HTMLEscapeString(string(bs)), "\n", `
    `, + gotemplate.HTMLEscapeString(buf.String()), "\n", `
    `, ) } else { - ctx.Data["FileContent"] = result.String() - ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(result.String()) + ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlString(result.String()) } } else { ctx.Data["IsRenderedHTML"] = true - buf, err = io.ReadAll(rd) + buf := &bytes.Buffer{} + ctx.Data["EscapeStatus"], err = charset.EscapeControlReader(rd, buf) if err != nil { - log.Error("ReadAll failed: %v", err) + log.Error("Read failed: %v", err) } - ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(buf) + ctx.Data["FileContent"] = strings.ReplaceAll( - gotemplate.HTMLEscapeString(string(buf)), "\n", `
    `, + gotemplate.HTMLEscapeString(buf.String()), "\n", `
    `, ) } } @@ -492,22 +492,28 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st ctx.ServerError("Render", err) return } - ctx.Data["FileContent"] = result.String() - ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(result.String()) + ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlString(result.String()) } else if readmeExist { - buf, _ := io.ReadAll(rd) + buf := &bytes.Buffer{} ctx.Data["IsRenderedHTML"] = true - ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(buf) + + ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf) + ctx.Data["FileContent"] = strings.ReplaceAll( - gotemplate.HTMLEscapeString(string(buf)), "\n", `
    `, + gotemplate.HTMLEscapeString(buf.String()), "\n", `
    `, ) } else { buf, _ := io.ReadAll(rd) lineNums := linesBytesCount(buf) ctx.Data["NumLines"] = strconv.Itoa(lineNums) ctx.Data["NumLinesSet"] = true - ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(buf) - ctx.Data["FileContent"] = highlight.File(lineNums, blob.Name(), buf) + fileContent := highlight.File(lineNums, blob.Name(), buf) + status, _ := charset.EscapeControlReader(bytes.NewReader(buf), io.Discard) + ctx.Data["EscapeStatus"] = status + for i, line := range fileContent { + _, fileContent[i] = charset.EscapeControlString(line) + } + ctx.Data["FileContent"] = fileContent } if !isLFSFile { if ctx.Repo.CanEnableEditor() { @@ -555,8 +561,8 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st ctx.ServerError("Render", err) return } - ctx.Data["FileContent"] = result.String() - ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(result.String()) + + ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlString(result.String()) } } diff --git a/routers/web/repo/wiki.go b/routers/web/repo/wiki.go index c606d76d15c8..6e9fdec42c3b 100644 --- a/routers/web/repo/wiki.go +++ b/routers/web/repo/wiki.go @@ -232,8 +232,8 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { ctx.ServerError("Render", err) return nil, nil } - ctx.Data["content"] = buf.String() - ctx.Data["BadBIDI"], ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(buf.String()) + + ctx.Data["EscapeStatus"], ctx.Data["content"] = charset.EscapeControlString(buf.String()) buf.Reset() if err := markdown.Render(rctx, bytes.NewReader(sidebarContent), &buf); err != nil { @@ -244,8 +244,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } ctx.Data["sidebarPresent"] = sidebarContent != nil - ctx.Data["sidebarContent"] = buf.String() - ctx.Data["sidebarBadBIDI"], ctx.Data["sidebarHasBIDI"] = charset.ContainsBIDIRuneString(buf.String()) + ctx.Data["sidebarEscapeStatus"], ctx.Data["sidebarContent"] = charset.EscapeControlString(buf.String()) buf.Reset() if err := markdown.Render(rctx, bytes.NewReader(footerContent), &buf); err != nil { @@ -256,8 +255,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } ctx.Data["footerPresent"] = footerContent != nil - ctx.Data["footerContent"] = buf.String() - ctx.Data["footerBadBIDI"], ctx.Data["footerHasBIDI"] = charset.ContainsBIDIRuneString(buf.String()) + ctx.Data["footerEscapeStatus"], ctx.Data["footerContent"] = charset.EscapeControlString(buf.String()) // get commit count - wiki revisions commitsCount, _ := wikiRepo.FileCommitsCount("master", pageFilename) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 6e92da0d7de8..c566986165c6 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -117,28 +117,6 @@ func (d *DiffLine) GetCommentSide() string { return d.Comments[0].DiffSide() } -// HasBIDI returns true if there is a BIDI rune in this line -func (d *DiffLine) HasBIDI() bool { - _, any := charset.ContainsBIDIRuneString(d.Content) - return any -} - -// LeftHasBIDI returns true if there is a BIDI rune in this line -func (d *DiffLine) LeftHasBIDI() bool { - if d.LeftIdx > 0 { - return d.HasBIDI() - } - return false -} - -// RightHasBIDI returns true if there is a BIDI rune in this line -func (d *DiffLine) RightHasBIDI() bool { - if d.RightIdx > 0 { - return d.HasBIDI() - } - return false -} - // GetLineTypeMarker returns the line type marker func (d *DiffLine) GetLineTypeMarker() string { if strings.IndexByte(" +-", d.Content[0]) > -1 { @@ -193,6 +171,7 @@ func getDiffLineSectionInfo(treePath, line string, lastLeftIdx, lastRightIdx int // escape a line's content or return
    needed for copy/paste purposes func getLineContent(content string) string { if len(content) > 0 { + _, content = charset.EscapeControlString(content) return html.EscapeString(content) } return "
    " @@ -504,7 +483,8 @@ func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineT buf.Write(codeTagSuffix) } } - return template.HTML(buf.Bytes()) + _, content := charset.EscapeControlString(buf.String()) + return template.HTML(content) } // GetLine gets a specific line by type (add or del) and file line number @@ -575,22 +555,26 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) tem case DiffLineAdd: compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx) if compareDiffLine == nil { - return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content[1:])) + _, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, diffLine.Content[1:])) + return template.HTML(escaped) } diff1 = compareDiffLine.Content diff2 = diffLine.Content case DiffLineDel: compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx) if compareDiffLine == nil { - return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content[1:])) + _, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, diffLine.Content[1:])) + return template.HTML(escaped) } diff1 = diffLine.Content diff2 = compareDiffLine.Content default: if strings.IndexByte(" +-", diffLine.Content[0]) > -1 { - return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content[1:])) + _, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, diffLine.Content[1:])) + return template.HTML(escaped) } - return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content)) + _, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, diffLine.Content)) + return template.HTML(escaped) } diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, diff1[1:]), highlight.Code(diffSection.FileName, diff2[1:]), true) @@ -955,6 +939,7 @@ parsingLoop: decoder := diffLineTypeDecoders[l.Type] if decoder != nil { if c, _, err := transform.String(decoder, l.Content[1:]); err == nil { + l.Content = l.Content[0:1] + c } } diff --git a/templates/repo/blame.tmpl b/templates/repo/blame.tmpl index d077dc2f34c6..7a3f3c1d9151 100644 --- a/templates/repo/blame.tmpl +++ b/templates/repo/blame.tmpl @@ -16,11 +16,13 @@ {{end}} {{.i18n.Tr "repo.normal_view"}} {{.i18n.Tr "repo.file_history"}} + {{.i18n.Tr "repo.unescape_control_characters"}} +
    -
    +
    {{range $row := .BlameRows}} @@ -53,7 +55,7 @@ {{end}} diff --git a/templates/repo/diff/box.tmpl b/templates/repo/diff/box.tmpl index 30039f0d1ab7..35a9e769ba7f 100644 --- a/templates/repo/diff/box.tmpl +++ b/templates/repo/diff/box.tmpl @@ -94,6 +94,10 @@ {{if $file.IsProtected}} {{$.i18n.Tr "repo.diff.protected"}} {{end}} + {{if not (or $file.IsIncomplete $file.IsBin $file.IsSubmodule)}} + {{$.i18n.Tr "repo.unescape_control_characters"}} + + {{end}} {{if and (not $file.IsSubmodule) (not $.PageIsWiki)}} {{if $file.IsDeleted}} {{$.i18n.Tr "repo.diff.view_file"}} @@ -104,7 +108,7 @@
    -
    +
    {{if or $file.IsIncomplete $file.IsBin}}
    {{if $file.IsIncomplete}} diff --git a/templates/repo/diff/section_split.tmpl b/templates/repo/diff/section_split.tmpl index ebc88cffe19a..aed6d784b307 100644 --- a/templates/repo/diff/section_split.tmpl +++ b/templates/repo/diff/section_split.tmpl @@ -22,22 +22,22 @@ {{end}} -
    + {{else if and (eq .GetType 3) $hasmatch}}{{/* DEL */}} {{$match := index $section.Lines $line.Match}} - + - + {{else}} - + - + {{end}} {{if and (eq .GetType 3) $hasmatch}} diff --git a/templates/repo/editor/diff_preview.tmpl b/templates/repo/editor/diff_preview.tmpl index 0ed330c57bd3..95e3f438b988 100644 --- a/templates/repo/editor/diff_preview.tmpl +++ b/templates/repo/editor/diff_preview.tmpl @@ -1,6 +1,6 @@
    -
    +
    - {{if $row.HasBIDI}}️⚠{{end}}{{$row.Code}} + {{$row.Code}}
    {{if $line.LeftHasBIDI}}️⚠{{end}}{{$section.GetComputedInlineDiffFor $line}}{{$section.GetComputedInlineDiffFor $line}} {{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{svg "octicon-plus"}}{{end}}{{if $line.LeftHasBIDI}}️⚠{{end}}{{if $line.LeftIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{svg "octicon-plus"}}{{end}}{{if $line.LeftIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}} {{if $match.RightIdx}}{{end}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{svg "octicon-plus"}}{{end}}{{if $match.RightHasBIDI}}️⚠{{end}}{{if $match.RightIdx}}{{$section.GetComputedInlineDiffFor $match}}{{end}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{svg "octicon-plus"}}{{end}}{{if $match.RightIdx}}{{$section.GetComputedInlineDiffFor $match}}{{end}} {{if $line.LeftIdx}}{{end}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 2))}}{{svg "octicon-plus"}}{{end}}{{if $line.LeftHasBIDI}}️⚠{{end}}{{if $line.LeftIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 2))}}{{svg "octicon-plus"}}{{end}}{{if $line.LeftIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}} {{if $line.RightIdx}}{{end}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 3))}}{{svg "octicon-plus"}}{{end}}{{if $line.RightHasBIDI}}️⚠{{end}}{{if $line.RightIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 3))}}{{svg "octicon-plus"}}{{end}}{{if $line.RightIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}}
    {{template "repo/diff/section_unified" dict "file" .File "root" $}} diff --git a/templates/repo/issue/view_content/comments.tmpl b/templates/repo/issue/view_content/comments.tmpl index 51d1e093c86d..343183d07c1f 100644 --- a/templates/repo/issue/view_content/comments.tmpl +++ b/templates/repo/issue/view_content/comments.tmpl @@ -511,7 +511,7 @@ {{$file := (index $diff.Files 0)}}
    -
    +
    {{template "repo/diff/section_unified" dict "file" $file "root" $}} diff --git a/templates/repo/settings/lfs_file.tmpl b/templates/repo/settings/lfs_file.tmpl index 1e92cac3d961..b26773d4c73d 100644 --- a/templates/repo/settings/lfs_file.tmpl +++ b/templates/repo/settings/lfs_file.tmpl @@ -8,17 +8,32 @@

    {{.i18n.Tr "repo.settings.lfs"}} / {{.LFSFile.Oid}}

    - {{if .BadBIDI}} + {{if .EscapeStatus.BadBIDI}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{.i18n.Tr "repo.bidi_bad_header"}} +
    + {{.i18n.Tr "repo.bidi_bad_description" | Str2html}} +
    + {{else if .EscapeStatus.Escaped}}
    {{svg "octicon-x" 16 "close inside"}}
    - {{.i18n.Tr "repo.bidi_header"}} + {{.i18n.Tr "repo.unicode_header"}}
    - {{.i18n.Tr "repo.bidi_description" | Str2html}} + {{.i18n.Tr "repo.unicode_description" | Str2html}}
    {{end}}
    diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index c05d51cc4992..356ae44c16b0 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -30,7 +30,6 @@
    {{end}}
    - {{if not .ReadmeInList}}
    {{if .HasSourceRenderedToggle}} + {{if not .ReadmeInList}} {{if .Repository.CanEnableEditor}} {{if .CanEditFile}} {{svg "octicon-pencil"}} @@ -60,17 +64,25 @@ {{svg "octicon-trash"}} {{end}} {{end}} -
    {{end}} +
    - {{if .BadBIDI}} + {{if .EscapeStatus.BadBIDI}}
    {{svg "octicon-x" 16 "close inside"}}
    - {{.i18n.Tr "repo.bidi_header"}} + {{.i18n.Tr "repo.bidi_bad_header"}} +
    + {{.i18n.Tr "repo.bidi_bad_description" | Str2html}} +
    + {{else if .EscapeStatus.Escaped}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{.i18n.Tr "repo.unicode_header"}}
    - {{.i18n.Tr "repo.bidi_description" | Str2html}} + {{.i18n.Tr "repo.unicode_description" | Str2html}}
    {{end}}
    diff --git a/templates/repo/wiki/view.tmpl b/templates/repo/wiki/view.tmpl index 3ba93f8ad346..f8860466760f 100644 --- a/templates/repo/wiki/view.tmpl +++ b/templates/repo/wiki/view.tmpl @@ -45,6 +45,10 @@
    + {{if .EscapeStatus.Escaped}} + {{.i18n.Tr "repo.unescape_control_characters"}} + {{.i18n.Tr "repo.escape_control_characters"}} + {{end}} {{if and .CanWriteWiki (not .Repository.IsMirror)}}
    {{.i18n.Tr "repo.wiki.edit_page_button"}} @@ -62,16 +66,24 @@ {{end}}
    - {{if .BadBIDI}} + {{if .EscapeStatus.BadBIDI}}
    {{svg "octicon-x" 16 "close inside"}}
    - {{.i18n.Tr "repo.bidi_header"}} + {{.i18n.Tr "repo.bidi_bad_header"}}
    - {{.i18n.Tr "repo.bidi_description" | Str2html}} + {{.i18n.Tr "repo.bidi_bad_description" | Str2html}} +
    + {{else if .EscapeStatus.Escaped}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{.i18n.Tr "repo.unicode_header"}} +
    + {{.i18n.Tr "repo.unicode_description" | Str2html}}
    {{end}} - {{.content | Str2html}} + {{.content | Safe}}
    {{if .sidebarPresent}}
    @@ -79,16 +91,22 @@ {{if and .CanWriteWiki (not .Repository.IsMirror)}} {{svg "octicon-pencil"}} {{end}} - {{if .sidebarBadBIDI}} + {{if .sidebarEscapeStatus.BadBIDI}}
    {{svg "octicon-x" 16 "close inside"}}
    - {{.i18n.Tr "repo.bidi_header"}} + {{.i18n.Tr "repo.bidi_bad_header"}} +
    +
    + {{else if .sidebarEscapeStatus.Escaped}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{.i18n.Tr "repo.unicode_header"}}
    - {{.i18n.Tr "repo.bidi_description" | Str2html}}
    {{end}} - {{.sidebarContent | Str2html}} + {{.sidebarContent | Safe}}
    {{end}} @@ -98,16 +116,22 @@ {{if and .CanWriteWiki (not .Repository.IsMirror)}} {{svg "octicon-pencil"}} {{end}} - {{if .footerBadBIDI}} + {{if .footerEscapeStatus.BadBIDI}}
    {{svg "octicon-x" 16 "close inside"}}
    - {{.i18n.Tr "repo.bidi_header"}} + {{.i18n.Tr "repo.bidi_bad_header"}} +
    +
    + {{else if .footerEscapeStatus.Escaped}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{.i18n.Tr "repo.unicode_header"}}
    - {{.i18n.Tr "repo.bidi_description" | Str2html}}
    {{end}} - {{.footerContent | Str2html}} + {{.footerContent | Safe}}
    {{end}}
    diff --git a/web_src/js/features/diff.js b/web_src/js/features/diff.js index 75797733b4ea..a4404e618e4c 100644 --- a/web_src/js/features/diff.js +++ b/web_src/js/features/diff.js @@ -23,11 +23,11 @@ export function initDiffShowMore() { }); } -export function initShowBidi() { - $('a.code-has-bidi').on('click', (e) => { +export function initShowEscapeCharacters() { + $('a.code-has-escaped').on('click', (e) => { e.preventDefault(); - $('a.code-has-bidi').each((_, target) => { + $('a.code-has-escaped').each((_, target) => { const inner = $(target).siblings().closest('.code-inner'); const escaped = inner.data('escaped'); let original = inner.data('original'); @@ -41,6 +41,11 @@ export function initShowBidi() { inner.data('original', original); } + inner.html(original.replaceAll(/(?![ \n\t])p{S}|p{C}|p{M}/g, (match) => { + const value = match.charCodeAt(0).toString(16); + return `[U+${value}]`; + })); + inner.html(original.replaceAll(/([\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069])/g, (match) => { const value = match.charCodeAt(0).toString(16); return `&#${value};`; diff --git a/web_src/js/features/repo-legacy.js b/web_src/js/features/repo-legacy.js index 6692d1902c93..4c1e08a6c3ec 100644 --- a/web_src/js/features/repo-legacy.js +++ b/web_src/js/features/repo-legacy.js @@ -546,6 +546,35 @@ export async function initRepository() { initRepoSettingBranches(); initRepoCommonLanguageStats(); + initEscapeButton(); +} + +function initEscapeButton() { + $('a.escape-button').on('click', (e) => { + e.preventDefault(); + $('.file-view').addClass('escaped'); + $('a.escape-button').hide(); + $('a.unescape-button').show(); + }); + $('a.unescape-button').on('click', (e) => { + e.preventDefault(); + $('.file-view').removeClass('escaped'); + $('a.escape-button').show(); + $('a.unescape-button').hide(); + }); + + $('a.escape-diff-button').on('click', (e) => { + e.preventDefault(); + $(e.target).parent().parent().parent().find('.file-code').addClass('escaped'); + $(e.target).hide(); + $(e.target).parent().find('a.unescape-diff-button').show(); + }); + $('a.unescape-diff-button').on('click', (e) => { + e.preventDefault(); + $(e.target).parent().parent().parent().find('.file-code').removeClass('escaped'); + $(e.target).hide(); + $(e.target).parent().find('a.escape-diff-button').show(); + }); } function initRepoIssueQuoteReply() { diff --git a/web_src/js/index.js b/web_src/js/index.js index ecec6683a460..158c9f719ef6 100644 --- a/web_src/js/index.js +++ b/web_src/js/index.js @@ -20,7 +20,7 @@ import {initNotificationCount, initNotificationsTable} from './features/notifica import {initLastCommitLoader} from './features/lastcommitloader.js'; import {initIssueContentHistory} from './features/issue-content-history.js'; import {initStopwatch} from './features/stopwatch.js'; -import {initDiffShowMore, initShowBidi} from './features/diff.js'; +import {initDiffShowMore, initShowEscapeCharacters} from './features/diff.js'; import {initCommentContent, initMarkupContent} from './markup/content.js'; import {initUserAuthLinkAccountView, initUserAuthOauth2} from './features/user-auth.js'; @@ -135,7 +135,7 @@ $(document).ready(async () => { initRepoReleaseEditor(); initRepoRelease(); initDiffShowMore(); - initShowBidi(); + initShowEscapeCharacters(); initIssueContentHistory(); initAdminUserListSearchForm(); initGlobalCopyToClipboardListener(); diff --git a/web_src/less/_repository.less b/web_src/less/_repository.less index ffec4043b5b6..2a459eb38dfb 100644 --- a/web_src/less/_repository.less +++ b/web_src/less/_repository.less @@ -76,6 +76,24 @@ } } + .escaped .escaped-code-point { + &[escaped]::before { + visibility: visible; + content: attr(escaped); + font-family: var(--fonts-monospace); + color: red; + } + + .char { + display: none; + } + } + + .broken-code-point { + font-family: var(--fonts-monospace); + color: blue; + } + .metas { .menu { overflow-x: auto; diff --git a/web_src/less/_review.less b/web_src/less/_review.less index 71d6ed985acf..3414be3fa4a3 100644 --- a/web_src/less/_review.less +++ b/web_src/less/_review.less @@ -20,8 +20,8 @@ opacity: 1; } -.diff-file-box .code-has-bidi, -.blame-code .code-has-bidi { +.diff-file-box .code-has-escaped, +.blame-code .code-has-escaped { opacity: 1; padding-left: 0 !important; padding-right: 0 !important; From 5f481cf83a44fddd7acd6e824aaf2d8da6dfa044 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Tue, 9 Nov 2021 14:36:44 +0000 Subject: [PATCH 05/30] placate lint Signed-off-by: Andrew Thornton --- modules/charset/charset.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/charset/charset.go b/modules/charset/charset.go index d06b096bbaad..75705c6b84e1 100644 --- a/modules/charset/charset.go +++ b/modules/charset/charset.go @@ -25,6 +25,7 @@ import ( // UTF8BOM is the utf-8 byte-order marker var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'} +// EscapeStatus represents the findings of the unicode escaper type EscapeStatus struct { Escaped bool HasError bool @@ -38,6 +39,7 @@ type EscapeStatus struct { HasLTRScript bool } +// Or combines two EscapeStatus structs into one representing the conjunction of the two func (status EscapeStatus) Or(other EscapeStatus) EscapeStatus { status.Escaped = status.Escaped || other.Escaped status.HasError = status.HasError || other.HasError @@ -52,18 +54,21 @@ func (status EscapeStatus) Or(other EscapeStatus) EscapeStatus { return status } +// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string func EscapeControlString(text string) (EscapeStatus, string) { sb := &strings.Builder{} escaped, _ := EscapeControlReader(strings.NewReader(text), sb) return escaped, sb.String() } +// EscapeControlBytes escapes the unicode control sequences a provided []byte and returns the findings as an EscapeStatus and the escaped []byte func EscapeControlBytes(text []byte) (EscapeStatus, []byte) { buf := &bytes.Buffer{} escaped, _ := EscapeControlReader(bytes.NewReader(text), buf) return escaped, buf.Bytes() } +// EscapeControlReader escapes the unicode control sequences a provided Reader writing the escaped output to the output and returns the findings as an EscapeStatus and an error func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) { buf := make([]byte, 4096) readStart := 0 From c89c678391bfa8dd395e438c697423f8da7611d6 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Tue, 9 Nov 2021 14:42:54 +0000 Subject: [PATCH 06/30] another placation Signed-off-by: Andrew Thornton --- modules/charset/charset.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/charset/charset.go b/modules/charset/charset.go index 75705c6b84e1..48af223e3e2d 100644 --- a/modules/charset/charset.go +++ b/modules/charset/charset.go @@ -121,7 +121,7 @@ readingloop: lineHasRTLScript = false lineHasLTRScript = false - case bytes.Contains([]byte("\r\t "), buf[i:i+size]): + case r == '\r' || r == '\t' || r == ' ': // These are acceptable control characters and space characters case unicode.IsSpace(r): escaped.HasSpaces = true From f563ee9045142f0a847b52b625c63bb97a872db3 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 14 Nov 2021 16:45:36 +0000 Subject: [PATCH 07/30] as per review Signed-off-by: Andrew Thornton --- modules/charset/charset.go | 8 ++--- modules/charset/charset_test.go | 14 ++++---- services/gitdiff/gitdiff.go | 1 - templates/repo/blame.tmpl | 2 +- templates/repo/diff/box.tmpl | 6 ++-- templates/repo/editor/diff_preview.tmpl | 2 +- .../repo/issue/view_content/comments.tmpl | 2 +- templates/repo/settings/lfs_file.tmpl | 9 ++--- web_src/js/features/repo-diff.js | 33 ------------------- web_src/js/features/repo-legacy.js | 29 +--------------- web_src/js/features/repo-unicode-escape.js | 14 ++++++++ web_src/js/index.js | 2 -- web_src/less/_base.less | 4 --- web_src/less/_repository.less | 8 ++--- web_src/less/_review.less | 10 ------ 15 files changed, 39 insertions(+), 105 deletions(-) create mode 100644 web_src/js/features/repo-unicode-escape.js diff --git a/modules/charset/charset.go b/modules/charset/charset.go index 48af223e3e2d..f40664753396 100644 --- a/modules/charset/charset.go +++ b/modules/charset/charset.go @@ -132,7 +132,7 @@ readingloop: return } } - if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { escaped.HasError = true return } @@ -147,7 +147,7 @@ readingloop: } } lineHasBIDI = true - if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { escaped.HasError = true return } @@ -161,7 +161,7 @@ readingloop: return } } - if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { escaped.HasError = true return } @@ -175,7 +175,7 @@ readingloop: return } } - if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { escaped.HasError = true return } diff --git a/modules/charset/charset_test.go b/modules/charset/charset_test.go index f0bcce8b0ad7..7d51aacf4766 100644 --- a/modules/charset/charset_test.go +++ b/modules/charset/charset_test.go @@ -300,7 +300,7 @@ var escapeControlTests = []escapeControlTest{ { name: "multi line western non-breaking space", text: "single line western\nmulti line western\n", - result: `single line western` + "\n" + `multi line western` + "\n", + result: `single line western` + "\n" + `multi line western` + "\n", status: EscapeStatus{Escaped: true, HasLTRScript: true, HasSpaces: true}, }, { @@ -354,7 +354,7 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`, For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, result: `Many computer programs fail to display bidirectional text correctly. - For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066" + `` + "\n" + + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066" + `` + "\n" + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, status: EscapeStatus{ Escaped: true, @@ -378,7 +378,7 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`, { name: "CVE testcase", text: "if access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {", - result: `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {`, + result: `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {`, status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true}, }, { @@ -388,9 +388,9 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`, `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` + "\nif access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {\n", result: `Many computer programs fail to display bidirectional text correctly. - For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066" + `` + "\n" + + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066" + `` + "\n" + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` + - "\n" + `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {` + "\n", + "\n" + `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {` + "\n", status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true}, }, } @@ -430,7 +430,7 @@ func TestEscapeControlReader(t *testing.T) { for _, test := range escapeControlTests { test.name += " (+Control)" test.text = "\u001E" + test.text - test.result = `` + "\u001e" + `` + test.result + test.result = `` + "\u001e" + `` + test.result test.status.Escaped = true test.status.HasControls = true tests = append(tests, test) @@ -439,7 +439,7 @@ func TestEscapeControlReader(t *testing.T) { for _, test := range escapeControlTests { test.name += " (+Mark)" test.text = "\u0300" + test.text - test.result = `` + "\u0300" + `` + test.result + test.result = `` + "\u0300" + `` + test.result test.status.Escaped = true test.status.HasMarks = true tests = append(tests, test) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index dcd31878a2d9..eca4f4fb5641 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -944,7 +944,6 @@ parsingLoop: decoder := diffLineTypeDecoders[l.Type] if decoder != nil { if c, _, err := transform.String(decoder, l.Content[1:]); err == nil { - l.Content = l.Content[0:1] + c } } diff --git a/templates/repo/blame.tmpl b/templates/repo/blame.tmpl index 7a3f3c1d9151..fb3b8c6ff9ae 100644 --- a/templates/repo/blame.tmpl +++ b/templates/repo/blame.tmpl @@ -22,7 +22,7 @@
    -
    +
    {{range $row := .BlameRows}} diff --git a/templates/repo/diff/box.tmpl b/templates/repo/diff/box.tmpl index 35a9e769ba7f..dc73a3ef7284 100644 --- a/templates/repo/diff/box.tmpl +++ b/templates/repo/diff/box.tmpl @@ -95,8 +95,8 @@ {{$.i18n.Tr "repo.diff.protected"}} {{end}} {{if not (or $file.IsIncomplete $file.IsBin $file.IsSubmodule)}} - {{$.i18n.Tr "repo.unescape_control_characters"}} - + {{$.i18n.Tr "repo.unescape_control_characters"}} + {{end}} {{if and (not $file.IsSubmodule) (not $.PageIsWiki)}} {{if $file.IsDeleted}} @@ -108,7 +108,7 @@
    -
    +
    {{if or $file.IsIncomplete $file.IsBin}}
    {{if $file.IsIncomplete}} diff --git a/templates/repo/editor/diff_preview.tmpl b/templates/repo/editor/diff_preview.tmpl index 95e3f438b988..e6956648ca8b 100644 --- a/templates/repo/editor/diff_preview.tmpl +++ b/templates/repo/editor/diff_preview.tmpl @@ -1,6 +1,6 @@
    -
    +
    {{template "repo/diff/section_unified" dict "file" .File "root" $}} diff --git a/templates/repo/issue/view_content/comments.tmpl b/templates/repo/issue/view_content/comments.tmpl index 343183d07c1f..a929ef00c450 100644 --- a/templates/repo/issue/view_content/comments.tmpl +++ b/templates/repo/issue/view_content/comments.tmpl @@ -511,7 +511,7 @@ {{$file := (index $diff.Files 0)}}
    -
    +
    {{template "repo/diff/section_unified" dict "file" $file "root" $}} diff --git a/templates/repo/settings/lfs_file.tmpl b/templates/repo/settings/lfs_file.tmpl index b26773d4c73d..0ef0b5421930 100644 --- a/templates/repo/settings/lfs_file.tmpl +++ b/templates/repo/settings/lfs_file.tmpl @@ -8,13 +8,10 @@

    {{.i18n.Tr "repo.settings.lfs"}} / {{.LFSFile.Oid}}

    diff --git a/web_src/js/features/repo-diff.js b/web_src/js/features/repo-diff.js index 0800e173fc57..76355615c646 100644 --- a/web_src/js/features/repo-diff.js +++ b/web_src/js/features/repo-diff.js @@ -104,36 +104,3 @@ export function initRepoDiffShowMore() { }); }); } - -export function initShowEscapeCharacters() { - $('a.code-has-escaped').on('click', (e) => { - e.preventDefault(); - - $('a.code-has-escaped').each((_, target) => { - const inner = $(target).siblings().closest('.code-inner'); - const escaped = inner.data('escaped'); - let original = inner.data('original'); - - if (escaped) { - inner.html(original); - inner.data('escaped', ''); - } else { - if (!original) { - original = $(inner).html(); - inner.data('original', original); - } - - inner.html(original.replaceAll(/(?![ \n\t])p{S}|p{C}|p{M}/g, (match) => { - const value = match.charCodeAt(0).toString(16); - return `[U+${value}]`; - })); - - inner.html(original.replaceAll(/([\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069])/g, (match) => { - const value = match.charCodeAt(0).toString(16); - return `&#${value};`; - })); - inner.data('escaped', 'escaped'); - } - }); - }); -} diff --git a/web_src/js/features/repo-legacy.js b/web_src/js/features/repo-legacy.js index f244af2c640e..0d2ba782284b 100644 --- a/web_src/js/features/repo-legacy.js +++ b/web_src/js/features/repo-legacy.js @@ -10,6 +10,7 @@ import { initRepoIssueWipToggle, initRepoPullRequestMerge, initRepoPullRequestUpdate, updateIssuesMeta, } from './repo-issue.js'; +import {initEscapeButton} from './repo-unicode-escape.js'; import {svg} from '../svg.js'; import {htmlEscape} from 'escape-goat'; import {initRepoBranchTagDropdown} from '../components/RepoBranchTagDropdown.js'; @@ -549,34 +550,6 @@ export function initRepository() { initEscapeButton(); } -function initEscapeButton() { - $('a.escape-button').on('click', (e) => { - e.preventDefault(); - $('.file-view').addClass('escaped'); - $('a.escape-button').hide(); - $('a.unescape-button').show(); - }); - $('a.unescape-button').on('click', (e) => { - e.preventDefault(); - $('.file-view').removeClass('escaped'); - $('a.escape-button').show(); - $('a.unescape-button').hide(); - }); - - $('a.escape-diff-button').on('click', (e) => { - e.preventDefault(); - $(e.target).parent().parent().parent().find('.file-code').addClass('escaped'); - $(e.target).hide(); - $(e.target).parent().find('a.unescape-diff-button').show(); - }); - $('a.unescape-diff-button').on('click', (e) => { - e.preventDefault(); - $(e.target).parent().parent().parent().find('.file-code').removeClass('escaped'); - $(e.target).hide(); - $(e.target).parent().find('a.escape-diff-button').show(); - }); -} - function initRepoIssueQuoteReply() { // Quote reply $(document).on('click', '.quote-reply', function (event) { diff --git a/web_src/js/features/repo-unicode-escape.js b/web_src/js/features/repo-unicode-escape.js new file mode 100644 index 000000000000..d79b78b9f17e --- /dev/null +++ b/web_src/js/features/repo-unicode-escape.js @@ -0,0 +1,14 @@ +export function initEscapeButton() { + $('a.escape-button').on('click', (e) => { + e.preventDefault(); + $(e.target).parents('.file-content, .non-diff-file-content').find('.file-code, .file-view').addClass('unicode-escaped'); + $(e.target).hide(); + $(e.target).siblings('a.unescape-button').show(); + }); + $('a.unescape-button').on('click', (e) => { + e.preventDefault(); + $(e.target).parents('.file-content, .non-diff-file-content').find('.file-code, .file-view').removeClass('unicode-escaped'); + $(e.target).hide(); + $(e.target).siblings('a.escape-button').show(); + }); +} diff --git a/web_src/js/index.js b/web_src/js/index.js index a23aa65cd6ae..957a0d9e8a7b 100644 --- a/web_src/js/index.js +++ b/web_src/js/index.js @@ -26,7 +26,6 @@ import { initRepoDiffConversationForm, initRepoDiffFileViewToggle, initRepoDiffReviewButton, initRepoDiffShowMore, - initShowEscapeCharacters, } from './features/repo-diff.js'; import { initRepoIssueDue, @@ -139,7 +138,6 @@ $(document).ready(() => { initRepoDiffFileViewToggle(); initRepoDiffReviewButton(); initRepoDiffShowMore(); - initShowEscapeCharacters(); initRepoEditor(); initRepoGraphGit(); initRepoIssueContentHistory(); diff --git a/web_src/less/_base.less b/web_src/less/_base.less index 01b7c1770292..c09f3a2bdd8a 100644 --- a/web_src/less/_base.less +++ b/web_src/less/_base.less @@ -2118,7 +2118,3 @@ table th[data-sortt-desc] { height: 15px; } } - -.escaped-char { - background-color: red; -} diff --git a/web_src/less/_repository.less b/web_src/less/_repository.less index 099a960cdfb9..5184012825be 100644 --- a/web_src/less/_repository.less +++ b/web_src/less/_repository.less @@ -76,12 +76,12 @@ } } - .escaped .escaped-code-point { - &[escaped]::before { + .unicode-escaped .escaped-code-point { + &[data-escaped]::before { visibility: visible; - content: attr(escaped); + content: attr(data-escaped); font-family: var(--fonts-monospace); - color: red; + color: var(--color-red); } .char { diff --git a/web_src/less/_review.less b/web_src/less/_review.less index 3414be3fa4a3..12bd6a608a8b 100644 --- a/web_src/less/_review.less +++ b/web_src/less/_review.less @@ -20,16 +20,6 @@ opacity: 1; } -.diff-file-box .code-has-escaped, -.blame-code .code-has-escaped { - opacity: 1; - padding-left: 0 !important; - padding-right: 0 !important; - padding-top: 2px; - padding-bottom: 2px; - font-family: var(--fonts-emoji); -} - .repository .diff-file-box .code-diff .add-comment-left, .repository .diff-file-box .code-diff .add-comment-right, .repository .diff-file-box .code-diff .add-code-comment .add-comment-left, From 62345baded2cfbc8a3f9d295dc68c0dfb621b255 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Tue, 16 Nov 2021 16:05:24 +0000 Subject: [PATCH 08/30] fix broken merge Signed-off-by: Andrew Thornton --- templates/repo/view_file.tmpl | 48 +++++++++++++++++------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index 560054276bae..34d86c39b4ba 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -30,29 +30,29 @@ {{end}} - {{if not .ReadmeInList}} -
    - {{if .HasSourceRenderedToggle}} - - {{end}} -
    - {{.i18n.Tr "repo.file_raw"}} - {{if not .IsViewCommit}} - {{.i18n.Tr "repo.file_permalink"}} - {{end}} - {{if .IsRepresentableAsText}} - {{.i18n.Tr "repo.blame"}} - {{end}} - {{.i18n.Tr "repo.file_history"}} - {{if .EscapeStatus.Escaped}} - - {{.i18n.Tr "repo.escape_control_characters"}} - {{end}} +
    + {{if .HasSourceRenderedToggle}} + - {{svg "octicon-download"}} + {{end}} +
    + {{.i18n.Tr "repo.file_raw"}} + {{if not .IsViewCommit}} + {{.i18n.Tr "repo.file_permalink"}} + {{end}} + {{if .IsRepresentableAsText}} + {{.i18n.Tr "repo.blame"}} + {{end}} + {{.i18n.Tr "repo.file_history"}} + {{if .EscapeStatus.Escaped}} + + {{.i18n.Tr "repo.escape_control_characters"}} + {{end}} +
    + {{svg "octicon-download"}} + {{if not .ReadmeInList}} {{if .Repository.CanEnableEditor}} {{if .CanEditFile}} {{svg "octicon-pencil"}} @@ -65,8 +65,8 @@ {{svg "octicon-trash"}} {{end}} {{end}} -
    - {{end}} + {{end}} +
    {{if .EscapeStatus.BadBIDI}} From 831f1899356828076de13cc6d6017912b542130c Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Tue, 16 Nov 2021 16:11:13 +0000 Subject: [PATCH 09/30] as per silverwind Signed-off-by: Andrew Thornton --- options/locale/locale_en-US.ini | 8 ++++---- templates/repo/view_file.tmpl | 4 ++-- web_src/less/_base.less | 4 ++++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index cfc96ac932e2..12bcbadfb315 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -978,11 +978,11 @@ file_view_raw = View Raw file_permalink = Permalink file_too_large = The file is too large to be shown. bidi_bad_header = `This file contains unexpected BiDi Unicode characters!` -bidi_bad_description = `This file contains unexpected BiDi Unicode characters that may be processed differently from what appears below.
    If your use case is intentional and legitimate, you can safely ignore this warning.
    Click escape to reveal hidden characters.` -bidi_bad_description_escaped = `This file contains unexpected BiDi Unicode characters.
    Hidden unicode characters are escaped below.
    Click unescape to show how they render.` +bidi_bad_description = `This file contains unexpected BiDi Unicode characters that may be processed differently from what appears below.
    If your use case is intentional and legitimate, you can safely ignore this warning.
    Use the Escape button to reveal hidden characters.` +bidi_bad_description_escaped = `This file contains unexpected BiDi Unicode characters.
    Hidden unicode characters are escaped below.
    Use the Unescape button to show how they render.` unicode_header = `This file contains hidden Unicode characters!` -unicode_description = `This file contains hidden Unicode characters that may be processed differently from what appears below.
    If your use case is intentional and legitimate, you can safely ignore this warning.
    Click escape to reveal hidden characters.` -unicode_description_escaped = `This file contains hidden Unicode characters.
    Hidden unicode characters are escaped below.
    Click unescape to show how they render.` +unicode_description = `This file contains hidden Unicode characters that may be processed differently from what appears below.
    If your use case is intentional and legitimate, you can safely ignore this warning.
    Use the Escape button to reveal hidden characters.` +unicode_description_escaped = `This file contains hidden Unicode characters.
    Hidden unicode characters are escaped below.
    Use the Unescape button to show how they render.` escape_control_characters = Escape unescape_control_characters = Unescape diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index 34d86c39b4ba..b6751cb76da9 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -70,7 +70,7 @@
    {{if .EscapeStatus.BadBIDI}} -
    +
    {{svg "octicon-x" 16 "close inside"}}
    {{.i18n.Tr "repo.bidi_bad_header"}} @@ -78,7 +78,7 @@ {{.i18n.Tr "repo.bidi_bad_description" | Str2html}}
    {{else if .EscapeStatus.Escaped}} -
    +
    {{svg "octicon-x" 16 "close inside"}}
    {{.i18n.Tr "repo.unicode_header"}} diff --git a/web_src/less/_base.less b/web_src/less/_base.less index c09f3a2bdd8a..81eac977a45d 100644 --- a/web_src/less/_base.less +++ b/web_src/less/_base.less @@ -587,6 +587,10 @@ a.ui.card:hover, color: var(--color-text-dark); } +.ui.error.message .header { + color: var(--color-red); +} + .dont-break-out { overflow-wrap: break-word; word-wrap: break-word; From 5a9759c11fb21b1c304b90007f21d8c044df2612 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Tue, 16 Nov 2021 16:38:42 +0000 Subject: [PATCH 10/30] as per silverwind Signed-off-by: Andrew Thornton --- web_src/less/themes/theme-arc-green.less | 3 +++ 1 file changed, 3 insertions(+) diff --git a/web_src/less/themes/theme-arc-green.less b/web_src/less/themes/theme-arc-green.less index a595848b76cf..59ecd965a864 100644 --- a/web_src/less/themes/theme-arc-green.less +++ b/web_src/less/themes/theme-arc-green.less @@ -357,6 +357,9 @@ td.blob-hunk { } .ui.warning.message { + .header { + color: #fff1dc; + } color: #ec8; box-shadow: 0 0 0 1px #ec8; } From 63a5e0fc4745bea53da00a86dd20165543c780db Mon Sep 17 00:00:00 2001 From: silverwind Date: Tue, 16 Nov 2021 22:35:31 +0100 Subject: [PATCH 11/30] fix class --- templates/repo/view_file.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index 59fb1306bf22..9ed484720d55 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -70,7 +70,7 @@
    {{if .EscapeStatus.BadBIDI}} -
    +
    {{svg "octicon-x" 16 "close inside"}}
    {{.i18n.Tr "repo.bidi_bad_header"}} From 8a01b2268a650cd33b0c517228c4640e5519652e Mon Sep 17 00:00:00 2001 From: silverwind Date: Tue, 16 Nov 2021 22:46:18 +0100 Subject: [PATCH 12/30] make message header colors work on both themes --- web_src/less/_base.less | 6 ++++-- web_src/less/themes/theme-arc-green.less | 3 --- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/web_src/less/_base.less b/web_src/less/_base.less index 81eac977a45d..74a5e71445dd 100644 --- a/web_src/less/_base.less +++ b/web_src/less/_base.less @@ -587,8 +587,10 @@ a.ui.card:hover, color: var(--color-text-dark); } -.ui.error.message .header { - color: var(--color-red); +.ui.error.message .header, +.ui.warning.message .header { + color: inherit; + filter: saturate(2); } .dont-break-out { diff --git a/web_src/less/themes/theme-arc-green.less b/web_src/less/themes/theme-arc-green.less index 59ecd965a864..a595848b76cf 100644 --- a/web_src/less/themes/theme-arc-green.less +++ b/web_src/less/themes/theme-arc-green.less @@ -357,9 +357,6 @@ td.blob-hunk { } .ui.warning.message { - .header { - color: #fff1dc; - } color: #ec8; box-shadow: 0 0 0 1px #ec8; } From 6449cad971ab65b1d6979cb28c525b121e00c8a1 Mon Sep 17 00:00:00 2001 From: silverwind Date: Tue, 16 Nov 2021 22:52:18 +0100 Subject: [PATCH 13/30] minor styling tweaks --- templates/repo/view_file.tmpl | 4 ++-- web_src/less/_repository.less | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index 9ed484720d55..88540a5ad6a9 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -70,7 +70,7 @@
    {{if .EscapeStatus.BadBIDI}} -
    +
    {{svg "octicon-x" 16 "close inside"}}
    {{.i18n.Tr "repo.bidi_bad_header"}} @@ -78,7 +78,7 @@ {{.i18n.Tr "repo.bidi_bad_description" | Str2html}}
    {{else if .EscapeStatus.Escaped}} -
    +
    {{svg "octicon-x" 16 "close inside"}}
    {{.i18n.Tr "repo.unicode_header"}} diff --git a/web_src/less/_repository.less b/web_src/less/_repository.less index 5184012825be..bb26c9d82f57 100644 --- a/web_src/less/_repository.less +++ b/web_src/less/_repository.less @@ -3093,6 +3093,11 @@ td.blob-excerpt { padding-left: 8px; } +.file-message { + margin-bottom: 0 !important; + border-radius: 0 !important; +} + .webhook-info { padding: 7px 12px; margin: 10px 0; From ab03673d04adaf82cdc9dfb4e4f322bc338630f6 Mon Sep 17 00:00:00 2001 From: silverwind Date: Tue, 16 Nov 2021 23:15:27 +0100 Subject: [PATCH 14/30] fix border-radius on unescape button --- web_src/less/_repository.less | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/web_src/less/_repository.less b/web_src/less/_repository.less index bb26c9d82f57..dcdafc302c00 100644 --- a/web_src/less/_repository.less +++ b/web_src/less/_repository.less @@ -3098,6 +3098,12 @@ td.blob-excerpt { border-radius: 0 !important; } +/* fomantic's last-child selector does not work with hidden last child */ +.ui.buttons .unescape-button { + border-top-right-radius: .28571429rem; + border-bottom-right-radius: .28571429rem; +} + .webhook-info { padding: 7px 12px; margin: 10px 0; From b93d0bf3817a6df02fc4855a99050f21b9c6fae1 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Thu, 18 Nov 2021 17:54:37 +0000 Subject: [PATCH 15/30] drop buttons as per silverwind Signed-off-by: Andrew Thornton --- templates/repo/view_file.tmpl | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index a78decee7773..d107fbcd4fee 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -37,22 +37,22 @@ {{svg "octicon-file" 15}}
    {{end}} -
    - {{.i18n.Tr "repo.file_raw"}} - {{if not .IsViewCommit}} - {{.i18n.Tr "repo.file_permalink"}} - {{end}} - {{if .IsRepresentableAsText}} - {{.i18n.Tr "repo.blame"}} - {{end}} - {{.i18n.Tr "repo.file_history"}} - {{if .EscapeStatus.Escaped}} - - {{.i18n.Tr "repo.escape_control_characters"}} - {{end}} -
    - {{svg "octicon-download"}} {{if not .ReadmeInList}} +
    + {{.i18n.Tr "repo.file_raw"}} + {{if not .IsViewCommit}} + {{.i18n.Tr "repo.file_permalink"}} + {{end}} + {{if .IsRepresentableAsText}} + {{.i18n.Tr "repo.blame"}} + {{end}} + {{.i18n.Tr "repo.file_history"}} + {{if .EscapeStatus.Escaped}} + + {{.i18n.Tr "repo.escape_control_characters"}} + {{end}} +
    + {{svg "octicon-download"}} {{if .Repository.CanEnableEditor}} {{if .CanEditFile}} {{svg "octicon-pencil"}} @@ -65,6 +65,9 @@ {{svg "octicon-trash"}} {{end}} {{end}} + {{else if .EscapeStatus.Escaped}} + + {{.i18n.Tr "repo.escape_control_characters"}} {{end}}
    From cf04f2ef8fc0bbf588064593b514251f2fedf52e Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Thu, 18 Nov 2021 20:07:51 +0000 Subject: [PATCH 16/30] as per fnetx Signed-off-by: Andrew Thornton --- templates/repo/diff/box.tmpl | 4 ++-- templates/repo/settings/lfs_file.tmpl | 4 ++-- templates/repo/view_file.tmpl | 4 ++-- templates/repo/wiki/view.tmpl | 8 ++++---- web_src/less/_repository.less | 10 ++++++++++ 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/templates/repo/diff/box.tmpl b/templates/repo/diff/box.tmpl index aba2ad095735..b9bc03441cc6 100644 --- a/templates/repo/diff/box.tmpl +++ b/templates/repo/diff/box.tmpl @@ -95,8 +95,8 @@ {{$.i18n.Tr "repo.diff.protected"}} {{end}} {{if not (or $file.IsIncomplete $file.IsBin $file.IsSubmodule)}} - {{$.i18n.Tr "repo.unescape_control_characters"}} - + {{$.i18n.Tr "repo.unescape_control_characters"}} + {{end}} {{if and (not $file.IsSubmodule) (not $.PageIsWiki)}} {{if $file.IsDeleted}} diff --git a/templates/repo/settings/lfs_file.tmpl b/templates/repo/settings/lfs_file.tmpl index 4425b52d5ff8..50a64685df56 100644 --- a/templates/repo/settings/lfs_file.tmpl +++ b/templates/repo/settings/lfs_file.tmpl @@ -22,7 +22,7 @@
    {{.i18n.Tr "repo.bidi_bad_header"}}
    - {{.i18n.Tr "repo.bidi_bad_description" | Str2html}} +

    {{.i18n.Tr "repo.bidi_bad_description" | Str2html}}

    {{else if .EscapeStatus.Escaped}}
    @@ -30,7 +30,7 @@
    {{.i18n.Tr "repo.unicode_header"}}
    - {{.i18n.Tr "repo.unicode_description" | Str2html}} +

    {{.i18n.Tr "repo.unicode_description" | Str2html}}

    {{end}}
    diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index d107fbcd4fee..04c1d7e9b6f6 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -78,7 +78,7 @@
    {{.i18n.Tr "repo.bidi_bad_header"}}
    - {{.i18n.Tr "repo.bidi_bad_description" | Str2html}} +

    {{.i18n.Tr "repo.bidi_bad_description" | Str2html}}

    {{else if .EscapeStatus.Escaped}}
    @@ -86,7 +86,7 @@
    {{.i18n.Tr "repo.unicode_header"}}
    - {{.i18n.Tr "repo.unicode_description" | Str2html}} +

    {{.i18n.Tr "repo.unicode_description" | Str2html}}

    {{end}}
    diff --git a/templates/repo/wiki/view.tmpl b/templates/repo/wiki/view.tmpl index 038aa6c767a0..946094808407 100644 --- a/templates/repo/wiki/view.tmpl +++ b/templates/repo/wiki/view.tmpl @@ -67,20 +67,20 @@
    {{if .EscapeStatus.BadBIDI}} -
    +
    {{svg "octicon-x" 16 "close inside"}}
    {{.i18n.Tr "repo.bidi_bad_header"}}
    - {{.i18n.Tr "repo.bidi_bad_description" | Str2html}} +

    {{.i18n.Tr "repo.bidi_bad_description" | Str2html}}

    {{else if .EscapeStatus.Escaped}} -
    +
    {{svg "octicon-x" 16 "close inside"}}
    {{.i18n.Tr "repo.unicode_header"}}
    - {{.i18n.Tr "repo.unicode_description" | Str2html}} +

    {{.i18n.Tr "repo.unicode_description" | Str2html}}

    {{end}} {{.content | Safe}} diff --git a/web_src/less/_repository.less b/web_src/less/_repository.less index dcdafc302c00..1576e704642e 100644 --- a/web_src/less/_repository.less +++ b/web_src/less/_repository.less @@ -3095,7 +3095,17 @@ td.blob-excerpt { .file-message { margin-bottom: 0 !important; +} + +.file-message, +.wiki-message { border-radius: 0 !important; + display: flex; + flex-direction: column; + p { + text-align: left; + align-self: center; + } } /* fomantic's last-child selector does not work with hidden last child */ From aa4fc5a2e6c549935b734513fa4a5bcfeab49003 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Thu, 18 Nov 2021 20:10:19 +0000 Subject: [PATCH 17/30] hide the unescape button in the wiki Signed-off-by: Andrew Thornton --- templates/repo/wiki/view.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/repo/wiki/view.tmpl b/templates/repo/wiki/view.tmpl index 946094808407..f8b07dfdefe8 100644 --- a/templates/repo/wiki/view.tmpl +++ b/templates/repo/wiki/view.tmpl @@ -46,7 +46,7 @@
    {{if .EscapeStatus.Escaped}} - {{.i18n.Tr "repo.unescape_control_characters"}} + {{.i18n.Tr "repo.escape_control_characters"}} {{end}} {{if and .CanWriteWiki (not .Repository.IsMirror)}} From 62f557d9755b47db7cfbb443767f87481fcecfca Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Thu, 18 Nov 2021 21:18:59 +0000 Subject: [PATCH 18/30] add warning triangles to view and blame Signed-off-by: Andrew Thornton --- options/locale/locale_en-US.ini | 1 + routers/web/repo/blame.go | 6 +++++- routers/web/repo/view.go | 4 +++- templates/repo/blame.tmpl | 3 +++ templates/repo/view_file.tmpl | 3 +++ web_src/js/features/repo-unicode-escape.js | 14 ++++++++++++++ web_src/less/_base.less | 4 ++++ 7 files changed, 33 insertions(+), 2 deletions(-) diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index ce51a2cb67ee..34ff1202952f 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -985,6 +985,7 @@ bidi_bad_description_escaped = `This file contains unexpected BiDi Unicode chara unicode_header = `This file contains hidden Unicode characters!` unicode_description = `This file contains hidden Unicode characters that may be processed differently from what appears below.
    If your use case is intentional and legitimate, you can safely ignore this warning.
    Use the Escape button to reveal hidden characters.` unicode_description_escaped = `This file contains hidden Unicode characters.
    Hidden unicode characters are escaped below.
    Use the Unescape button to show how they render.` +line_unicode = `This line has hidden unicode characters` escape_control_characters = Escape unescape_control_characters = Unescape diff --git a/routers/web/repo/blame.go b/routers/web/repo/blame.go index 7b5cc8db665c..0f185fb6d874 100644 --- a/routers/web/repo/blame.go +++ b/routers/web/repo/blame.go @@ -39,6 +39,7 @@ type blameRow struct { CommitMessage string CommitSince gotemplate.HTML Code gotemplate.HTML + EscapeStatus charset.EscapeStatus } // RefBlame render blame page @@ -233,6 +234,7 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m } var lines = make([]string, 0) rows := make([]*blameRow, 0) + escapeStatus := charset.EscapeStatus{} var i = 0 var commitCnt = 0 @@ -277,12 +279,14 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m fileName := fmt.Sprintf("%v", ctx.Data["FileName"]) line = highlight.Code(fileName, language, line) - _, line = charset.EscapeControlString(line) + br.EscapeStatus, line = charset.EscapeControlString(line) br.Code = gotemplate.HTML(line) rows = append(rows, br) + escapeStatus = escapeStatus.Or(br.EscapeStatus) } } + ctx.Data["EscapeStatus"] = escapeStatus ctx.Data["BlameRows"] = rows ctx.Data["CommitCnt"] = commitCnt } diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 985efd45f9b2..cddc88c401e6 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -537,10 +537,12 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st fileContent := highlight.File(lineNums, blob.Name(), language, buf) status, _ := charset.EscapeControlReader(bytes.NewReader(buf), io.Discard) ctx.Data["EscapeStatus"] = status + statuses := make([]charset.EscapeStatus, len(fileContent)) for i, line := range fileContent { - _, fileContent[i] = charset.EscapeControlString(line) + statuses[i], fileContent[i] = charset.EscapeControlString(line) } ctx.Data["FileContent"] = fileContent + ctx.Data["LineEscapeStatus"] = statuses } if !isLFSFile { if ctx.Repo.CanEnableEditor() { diff --git a/templates/repo/blame.tmpl b/templates/repo/blame.tmpl index 3c04d52c7c03..7850b6ccd8ba 100644 --- a/templates/repo/blame.tmpl +++ b/templates/repo/blame.tmpl @@ -54,6 +54,9 @@
    + {{if $.EscapeStatus.Escaped}} + + {{end}} diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index 04c1d7e9b6f6..e7df2d7aad63 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -130,6 +130,9 @@ + {{if $.EscapeStatus.Escaped}} + + {{end}} diff --git a/web_src/js/features/repo-unicode-escape.js b/web_src/js/features/repo-unicode-escape.js index d79b78b9f17e..abd886f8502d 100644 --- a/web_src/js/features/repo-unicode-escape.js +++ b/web_src/js/features/repo-unicode-escape.js @@ -11,4 +11,18 @@ export function initEscapeButton() { $(e.target).hide(); $(e.target).siblings('a.escape-button').show(); }); + $('a.toggle-escape-button').on('click', (e) => { + e.preventDefault(); + const fileContent = $(e.target).parents('.file-content, .non-diff-file-content'); + const fileView = fileContent.find('.file-code, .file-view'); + if (fileView.hasClass('unicode-escaped')) { + fileView.removeClass('unicode-escaped'); + fileContent.find('a.unescape-button').hide(); + fileContent.find('a.escape-button').show(); + } else { + fileView.addClass('unicode-escaped'); + fileContent.find('a.unescape-button').show(); + fileContent.find('a.escape-button').hide(); + } + }); } diff --git a/web_src/less/_base.less b/web_src/less/_base.less index 74a5e71445dd..524aaf4cbfe0 100644 --- a/web_src/less/_base.less +++ b/web_src/less/_base.less @@ -1527,6 +1527,10 @@ a.ui.label:hover { } } +.lines-escape { + width: 0; +} + .lines-code { background-color: var(--color-code-bg); padding-left: 5px; From b6ba9580b831a53a1ddee389ceb4c5bcaa754334 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Thu, 18 Nov 2021 22:49:40 +0000 Subject: [PATCH 19/30] Add warning triangles to diff Signed-off-by: Andrew Thornton --- services/gitdiff/gitdiff.go | 45 +++++++++++-------- services/gitdiff/gitdiff_test.go | 20 ++++----- templates/repo/diff/blob_excerpt.tmpl | 18 ++++++-- templates/repo/diff/section_split.tmpl | 56 +++++++++++++++++++++--- templates/repo/diff/section_unified.tmpl | 15 ++++++- web_src/less/_review.less | 7 +++ 6 files changed, 122 insertions(+), 39 deletions(-) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 3b52943a3209..482d58a98eb9 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -168,12 +168,15 @@ func getDiffLineSectionInfo(treePath, line string, lastLeftIdx, lastRightIdx int } // escape a line's content or return
    needed for copy/paste purposes -func getLineContent(content string) string { +func getLineContent(content string) DiffInline { if len(content) > 0 { - _, content = charset.EscapeControlString(content) - return html.EscapeString(content) + status, content := charset.EscapeControlString(content) + return DiffInline{ + EscapeStatus: status, + Content: template.HTML(html.EscapeString(content)), + } } - return "
    " + return DiffInline{Content: "
    "} } // DiffSection represents a section of a DiffFile. @@ -411,7 +414,7 @@ func fixupBrokenSpans(diffs []diffmatchpatch.Diff) []diffmatchpatch.Diff { return fixedup } -func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) template.HTML { +func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) DiffInline { buf := bytes.NewBuffer(nil) match := "" @@ -483,8 +486,8 @@ func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineT buf.Write(codeTagSuffix) } } - _, content := charset.EscapeControlString(buf.String()) - return template.HTML(content) + status, content := charset.EscapeControlString(buf.String()) + return DiffInline{EscapeStatus: status, Content: template.HTML(content)} } // GetLine gets a specific line by type (add or del) and file line number @@ -536,10 +539,16 @@ func init() { diffMatchPatch.DiffEditCost = 100 } +// DiffInline is a struct that has a content and escape status +type DiffInline struct { + EscapeStatus charset.EscapeStatus + Content template.HTML +} + // GetComputedInlineDiffFor computes inline diff for the given line. -func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) template.HTML { +func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) DiffInline { if setting.Git.DisableDiffHighlight { - return template.HTML(getLineContent(diffLine.Content[1:])) + return getLineContent(diffLine.Content[1:]) } var ( @@ -556,30 +565,30 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) tem // try to find equivalent diff line. ignore, otherwise switch diffLine.Type { case DiffLineSection: - return template.HTML(getLineContent(diffLine.Content[1:])) + return getLineContent(diffLine.Content[1:]) case DiffLineAdd: compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx) if compareDiffLine == nil { - _, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, language, diffLine.Content[1:])) - return template.HTML(escaped) + status, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, language, diffLine.Content[1:])) + return DiffInline{EscapeStatus: status, Content: template.HTML(escaped)} } diff1 = compareDiffLine.Content diff2 = diffLine.Content case DiffLineDel: compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx) if compareDiffLine == nil { - _, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, language, diffLine.Content[1:])) - return template.HTML(escaped) + status, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, language, diffLine.Content[1:])) + return DiffInline{EscapeStatus: status, Content: template.HTML(escaped)} } diff1 = diffLine.Content diff2 = compareDiffLine.Content default: if strings.IndexByte(" +-", diffLine.Content[0]) > -1 { - _, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, language, diffLine.Content[1:])) - return template.HTML(escaped) + status, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, language, diffLine.Content[1:])) + return DiffInline{EscapeStatus: status, Content: template.HTML(escaped)} } - _, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, language, diffLine.Content)) - return template.HTML(escaped) + status, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, language, diffLine.Content)) + return DiffInline{EscapeStatus: status, Content: template.HTML(escaped)} } diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, language, diff1[1:]), highlight.Code(diffSection.FileName, language, diff2[1:]), true) diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index c6c6f3b0e358..e7e583ba95ac 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -37,14 +37,14 @@ func TestDiffToHTML(t *testing.T) { {Type: dmp.DiffInsert, Text: "bar"}, {Type: dmp.DiffDelete, Text: " baz"}, {Type: dmp.DiffEqual, Text: " biz"}, - }, DiffLineAdd)) + }, DiffLineAdd).Content) assertEqual(t, "foo bar biz", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffEqual, Text: "foo "}, {Type: dmp.DiffDelete, Text: "bar"}, {Type: dmp.DiffInsert, Text: " baz"}, {Type: dmp.DiffEqual, Text: " biz"}, - }, DiffLineDel)) + }, DiffLineDel).Content) assertEqual(t, "if !nohl && (lexer != nil || r.GuessLanguage) {", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffEqual, Text: "if !nohl && lexer != nil"}, {Type: dmp.DiffInsert, Text: " || r.GuessLanguage)"}, {Type: dmp.DiffEqual, Text: " {"}, - }, DiffLineAdd)) + }, DiffLineAdd).Content) assertEqual(t, "tagURL := fmt.Sprintf("## [%s](%s/%s/%s/%s?q=&type=all&state=closed&milestone=%d) - %s", ge.Milestone\", ge.BaseURL, ge.Owner, ge.Repo, from, milestoneID, time.Now().Format("2006-01-02"))", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffEqual, Text: "tagURL := , milestoneID, time.Now().Format("2006-01-02")"}, {Type: dmp.DiffInsert, Text: "ge.Milestone, from, milestoneID"}, {Type: dmp.DiffEqual, Text: ")"}, - }, DiffLineDel)) + }, DiffLineDel).Content) assertEqual(t, "r.WrapperRenderer(w, language, true, attrs, false)", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffEqual, Text: "r.WrapperRenderer(w, "}, @@ -70,14 +70,14 @@ func TestDiffToHTML(t *testing.T) { {Type: dmp.DiffEqual, Text: "c"}, {Type: dmp.DiffDelete, Text: "lass=\"p\">, true, attrs"}, {Type: dmp.DiffEqual, Text: ", false)"}, - }, DiffLineDel)) + }, DiffLineDel).Content) assertEqual(t, "language, true, attrs, false)", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffInsert, Text: "language, true, attrs"}, {Type: dmp.DiffEqual, Text: ", false)"}, - }, DiffLineAdd)) + }, DiffLineAdd).Content) assertEqual(t, "print("// ", sys.argv)", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffEqual, Text: "print"}, @@ -86,14 +86,14 @@ func TestDiffToHTML(t *testing.T) { {Type: dmp.DiffInsert, Text: "class=\"p\">("}, {Type: dmp.DiffEqual, Text: ""// ", sys.argv"}, {Type: dmp.DiffInsert, Text: ")"}, - }, DiffLineAdd)) + }, DiffLineAdd).Content) assertEqual(t, "sh 'useradd -u $(stat -c "%u" .gitignore) jenkins'", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffEqual, Text: "sh "}, {Type: dmp.DiffDelete, Text: "4;useradd -u 111 jenkins""}, {Type: dmp.DiffInsert, Text: "9;useradd -u $(stat -c "%u" .gitignore) jenkins'"}, {Type: dmp.DiffEqual, Text: ";"}, - }, DiffLineAdd)) + }, DiffLineAdd).Content) assertEqual(t, " <h4 class="release-list-title df ac">", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffEqual, Text: " <h"}, @@ -101,7 +101,7 @@ func TestDiffToHTML(t *testing.T) { {Type: dmp.DiffEqual, Text: "3"}, {Type: dmp.DiffInsert, Text: "4;release-list-title df ac""}, {Type: dmp.DiffEqual, Text: ">"}, - }, DiffLineAdd)) + }, DiffLineAdd).Content) } func TestParsePatch_singlefile(t *testing.T) { @@ -539,7 +539,7 @@ func TestDiffToHTML_14231(t *testing.T) { expected := ` run(db)` output := diffToHTML("main.v", diffRecord, DiffLineAdd) - assertEqual(t, expected, output) + assertEqual(t, expected, output.Content) } func TestNoCrashes(t *testing.T) { diff --git a/templates/repo/diff/blob_excerpt.tmpl b/templates/repo/diff/blob_excerpt.tmpl index 792c539ac592..e529ed3bcd07 100644 --- a/templates/repo/diff/blob_excerpt.tmpl +++ b/templates/repo/diff/blob_excerpt.tmpl @@ -19,14 +19,26 @@ {{end}} -
    + {{else}} - + - + {{end}} {{end}} diff --git a/templates/repo/diff/section_split.tmpl b/templates/repo/diff/section_split.tmpl index fb6977e204e6..5e133a184367 100644 --- a/templates/repo/diff/section_split.tmpl +++ b/templates/repo/diff/section_split.tmpl @@ -21,23 +21,67 @@ {{svg "octicon-fold"}} {{end}} - - + {{$inlineDiff := $section.GetComputedInlineDiffFor $line}} + {{else if and (eq .GetType 3) $hasmatch}}{{/* DEL */}} {{$match := index $section.Lines $line.Match}} - + - + {{else}} - + - + {{end}} {{if and (eq .GetType 3) $hasmatch}} diff --git a/templates/repo/diff/section_unified.tmpl b/templates/repo/diff/section_unified.tmpl index 57e8fb9a1603..ad813c9ee28c 100644 --- a/templates/repo/diff/section_unified.tmpl +++ b/templates/repo/diff/section_unified.tmpl @@ -27,9 +27,20 @@ {{end}} {{if eq .GetType 4}} - + {{else}} - + {{end}} {{if gt (len $line.Comments) 0}} diff --git a/web_src/less/_review.less b/web_src/less/_review.less index 12bd6a608a8b..89cb9fe4bcf8 100644 --- a/web_src/less/_review.less +++ b/web_src/less/_review.less @@ -16,6 +16,13 @@ } } +.diff-file-box .lines-code .code-inner.has-escaped::before { + visibility: visible; + content: '⚠️'; + font-family: var(--fonts-emoji); + color: var(--color-red); +} + .diff-file-box .lines-code:hover .ui.button.add-code-comment { opacity: 1; } From 6a2e274adc68863f180c9299660140ffd3d37368 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 29 Nov 2021 18:44:33 +0000 Subject: [PATCH 20/30] ensure buttons work on loaded diffs Signed-off-by: Andrew Thornton --- web_src/js/features/repo-unicode-escape.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web_src/js/features/repo-unicode-escape.js b/web_src/js/features/repo-unicode-escape.js index abd886f8502d..22620958eb2b 100644 --- a/web_src/js/features/repo-unicode-escape.js +++ b/web_src/js/features/repo-unicode-escape.js @@ -1,17 +1,17 @@ export function initEscapeButton() { - $('a.escape-button').on('click', (e) => { + $(document, 'a.escape-button').on('click', (e) => { e.preventDefault(); $(e.target).parents('.file-content, .non-diff-file-content').find('.file-code, .file-view').addClass('unicode-escaped'); $(e.target).hide(); $(e.target).siblings('a.unescape-button').show(); }); - $('a.unescape-button').on('click', (e) => { + $(document, 'a.unescape-button').on('click', (e) => { e.preventDefault(); $(e.target).parents('.file-content, .non-diff-file-content').find('.file-code, .file-view').removeClass('unicode-escaped'); $(e.target).hide(); $(e.target).siblings('a.escape-button').show(); }); - $('a.toggle-escape-button').on('click', (e) => { + $(document, 'a.toggle-escape-button').on('click', (e) => { e.preventDefault(); const fileContent = $(e.target).parents('.file-content, .non-diff-file-content'); const fileView = fileContent.find('.file-code, .file-view'); From 0d6e8f68c3e83476eb05d19de707ad639616aab8 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 29 Nov 2021 18:47:42 +0000 Subject: [PATCH 21/30] move escape functions into their own files Signed-off-by: Andrew Thornton --- modules/charset/charset.go | 205 ------------------------------ modules/charset/charset_test.go | 192 ---------------------------- modules/charset/escape.go | 219 ++++++++++++++++++++++++++++++++ modules/charset/escape_test.go | 202 +++++++++++++++++++++++++++++ 4 files changed, 421 insertions(+), 397 deletions(-) create mode 100644 modules/charset/escape.go create mode 100644 modules/charset/escape_test.go diff --git a/modules/charset/charset.go b/modules/charset/charset.go index f40664753396..ae5cf5aa1a4e 100644 --- a/modules/charset/charset.go +++ b/modules/charset/charset.go @@ -9,7 +9,6 @@ import ( "fmt" "io" "strings" - "unicode" "unicode/utf8" "code.gitea.io/gitea/modules/log" @@ -19,215 +18,11 @@ import ( "github.com/gogs/chardet" "golang.org/x/net/html/charset" "golang.org/x/text/transform" - "golang.org/x/text/unicode/bidi" ) // UTF8BOM is the utf-8 byte-order marker var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'} -// EscapeStatus represents the findings of the unicode escaper -type EscapeStatus struct { - Escaped bool - HasError bool - HasBadRunes bool - HasControls bool - HasSpaces bool - HasMarks bool - HasBIDI bool - BadBIDI bool - HasRTLScript bool - HasLTRScript bool -} - -// Or combines two EscapeStatus structs into one representing the conjunction of the two -func (status EscapeStatus) Or(other EscapeStatus) EscapeStatus { - status.Escaped = status.Escaped || other.Escaped - status.HasError = status.HasError || other.HasError - status.HasBadRunes = status.HasBadRunes || other.HasBadRunes - status.HasControls = status.HasControls || other.HasControls - status.HasSpaces = status.HasSpaces || other.HasSpaces - status.HasMarks = status.HasMarks || other.HasMarks - status.HasBIDI = status.HasBIDI || other.HasBIDI - status.BadBIDI = status.BadBIDI || other.BadBIDI - status.HasRTLScript = status.HasRTLScript || other.HasRTLScript - status.HasLTRScript = status.HasLTRScript || other.HasLTRScript - return status -} - -// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string -func EscapeControlString(text string) (EscapeStatus, string) { - sb := &strings.Builder{} - escaped, _ := EscapeControlReader(strings.NewReader(text), sb) - return escaped, sb.String() -} - -// EscapeControlBytes escapes the unicode control sequences a provided []byte and returns the findings as an EscapeStatus and the escaped []byte -func EscapeControlBytes(text []byte) (EscapeStatus, []byte) { - buf := &bytes.Buffer{} - escaped, _ := EscapeControlReader(bytes.NewReader(text), buf) - return escaped, buf.Bytes() -} - -// EscapeControlReader escapes the unicode control sequences a provided Reader writing the escaped output to the output and returns the findings as an EscapeStatus and an error -func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) { - buf := make([]byte, 4096) - readStart := 0 - var n int - var writePos int - - lineHasBIDI := false - lineHasRTLScript := false - lineHasLTRScript := false - -readingloop: - for err == nil { - n, err = text.Read(buf[readStart:]) - bs := buf[:n+readStart] - i := 0 - - for i < len(bs) { - r, size := utf8.DecodeRune(bs[i:]) - // Now handle the codepoints - switch { - case r == utf8.RuneError: - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - writePos = i - } - // runes can be at most 4 bytes - so... - if len(bs)-i <= 3 { - // if not request more data - copy(buf, bs[i:]) - readStart = n - i - writePos = 0 - continue readingloop - } - // this is a real broken rune - escaped.HasBadRunes = true - escaped.Escaped = true - if _, err = fmt.Fprintf(output, `<%X>`, bs[i:i+size]); err != nil { - escaped.HasError = true - return - } - writePos += size - case r == '\n': - if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript { - escaped.BadBIDI = true - } - lineHasBIDI = false - lineHasRTLScript = false - lineHasLTRScript = false - - case r == '\r' || r == '\t' || r == ' ': - // These are acceptable control characters and space characters - case unicode.IsSpace(r): - escaped.HasSpaces = true - escaped.Escaped = true - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - } - if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { - escaped.HasError = true - return - } - writePos = i + size - case unicode.Is(unicode.Bidi_Control, r): - escaped.Escaped = true - escaped.HasBIDI = true - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - } - lineHasBIDI = true - if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { - escaped.HasError = true - return - } - writePos = i + size - case unicode.Is(unicode.C, r): - escaped.Escaped = true - escaped.HasControls = true - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - } - if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { - escaped.HasError = true - return - } - writePos = i + size - case unicode.Is(unicode.M, r): - escaped.Escaped = true - escaped.HasMarks = true - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - } - if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { - escaped.HasError = true - return - } - writePos = i + size - default: - p, _ := bidi.Lookup(bs[i : i+size]) - c := p.Class() - if c == bidi.R || c == bidi.AL { - lineHasRTLScript = true - escaped.HasRTLScript = true - } else if c == bidi.L { - lineHasLTRScript = true - escaped.HasLTRScript = true - } - } - i += size - } - if n > 0 { - // we read something... - // write everything unwritten - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - } - - // reset the starting positions for the next read - readStart = 0 - writePos = 0 - } - } - if readStart > 0 { - // this means that there is an incomplete or broken rune at 0-readStart and we read nothing on the last go round - escaped.Escaped = true - escaped.HasBadRunes = true - if _, err = fmt.Fprintf(output, `<%X>`, buf[:readStart]); err != nil { - escaped.HasError = true - return - } - } - if err == io.EOF { - if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript { - escaped.BadBIDI = true - } - err = nil - return - } - escaped.HasError = true - return -} - // ToUTF8WithFallbackReader detects the encoding of content and coverts to UTF-8 reader if possible func ToUTF8WithFallbackReader(rd io.Reader) io.Reader { var buf = make([]byte, 2048) diff --git a/modules/charset/charset_test.go b/modules/charset/charset_test.go index 7d51aacf4766..33f0c10a7a20 100644 --- a/modules/charset/charset_test.go +++ b/modules/charset/charset_test.go @@ -5,7 +5,6 @@ package charset import ( - "reflect" "strings" "testing" @@ -273,194 +272,3 @@ func stringMustEndWith(t *testing.T, expected string, value string) { func bytesMustStartWith(t *testing.T, expected []byte, value []byte) { assert.Equal(t, expected, value[:len(expected)]) } - -type escapeControlTest struct { - name string - text string - status EscapeStatus - result string -} - -var escapeControlTests = []escapeControlTest{ - { - name: "", - }, - { - name: "single line western", - text: "single line western", - result: "single line western", - status: EscapeStatus{HasLTRScript: true}, - }, - { - name: "multi line western", - text: "single line western\nmulti line western\n", - result: "single line western\nmulti line western\n", - status: EscapeStatus{HasLTRScript: true}, - }, - { - name: "multi line western non-breaking space", - text: "single line western\nmulti line western\n", - result: `single line western` + "\n" + `multi line western` + "\n", - status: EscapeStatus{Escaped: true, HasLTRScript: true, HasSpaces: true}, - }, - { - name: "mixed scripts: western + japanese", - text: "日属秘ぞしちゅ。Then some western.", - result: "日属秘ぞしちゅ。Then some western.", - status: EscapeStatus{HasLTRScript: true}, - }, - { - name: "japanese", - text: "日属秘ぞしちゅ。", - result: "日属秘ぞしちゅ。", - status: EscapeStatus{HasLTRScript: true}, - }, - { - name: "hebrew", - text: "עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה", - result: "עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה", - status: EscapeStatus{HasRTLScript: true}, - }, - { - name: "more hebrew", - text: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( ' ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד. - - המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף. - - בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`, - result: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( ' ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד. - - המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף. - - בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`, - status: EscapeStatus{HasRTLScript: true}, - }, - { - name: "Mixed RTL+LTR", - text: `Many computer programs fail to display bidirectional text correctly. -For example, the Hebrew name Sarah (שרה) is spelled: sin (ש) (which appears rightmost), -then resh (ר), and finally heh (ה) (which should appear leftmost).`, - result: `Many computer programs fail to display bidirectional text correctly. -For example, the Hebrew name Sarah (שרה) is spelled: sin (ש) (which appears rightmost), -then resh (ר), and finally heh (ה) (which should appear leftmost).`, - status: EscapeStatus{ - HasRTLScript: true, - HasLTRScript: true, - }, - }, - { - name: "Mixed RTL+LTR+BIDI", - text: `Many computer programs fail to display bidirectional text correctly. - For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + - `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, - result: `Many computer programs fail to display bidirectional text correctly. - For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066" + `` + "\n" + - `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, - status: EscapeStatus{ - Escaped: true, - HasBIDI: true, - HasRTLScript: true, - HasLTRScript: true, - }, - }, - { - name: "Accented characters", - text: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}), - result: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}), - status: EscapeStatus{HasLTRScript: true}, - }, - { - name: "Program", - text: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})", - result: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})", - status: EscapeStatus{HasLTRScript: true}, - }, - { - name: "CVE testcase", - text: "if access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {", - result: `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {`, - status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true}, - }, - { - name: "Mixed testcase with fail", - text: `Many computer programs fail to display bidirectional text correctly. - For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + - `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` + - "\nif access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {\n", - result: `Many computer programs fail to display bidirectional text correctly. - For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066" + `` + "\n" + - `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` + - "\n" + `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {` + "\n", - status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true}, - }, -} - -func TestEscapeControlString(t *testing.T) { - for _, tt := range escapeControlTests { - t.Run(tt.name, func(t *testing.T) { - status, result := EscapeControlString(tt.text) - if !reflect.DeepEqual(status, tt.status) { - t.Errorf("EscapeControlString() status = %v, wanted= %v", status, tt.status) - } - if result != tt.result { - t.Errorf("EscapeControlString()\nresult= %v,\nwanted= %v", result, tt.result) - } - }) - } -} - -func TestEscapeControlBytes(t *testing.T) { - for _, tt := range escapeControlTests { - t.Run(tt.name, func(t *testing.T) { - status, result := EscapeControlBytes([]byte(tt.text)) - if !reflect.DeepEqual(status, tt.status) { - t.Errorf("EscapeControlBytes() status = %v, wanted= %v", status, tt.status) - } - if string(result) != tt.result { - t.Errorf("EscapeControlBytes()\nresult= %v,\nwanted= %v", result, tt.result) - } - }) - } -} - -func TestEscapeControlReader(t *testing.T) { - // lets add some control characters to the tests - tests := make([]escapeControlTest, 0, len(escapeControlTests)*3) - copy(tests, escapeControlTests) - for _, test := range escapeControlTests { - test.name += " (+Control)" - test.text = "\u001E" + test.text - test.result = `` + "\u001e" + `` + test.result - test.status.Escaped = true - test.status.HasControls = true - tests = append(tests, test) - } - - for _, test := range escapeControlTests { - test.name += " (+Mark)" - test.text = "\u0300" + test.text - test.result = `` + "\u0300" + `` + test.result - test.status.Escaped = true - test.status.HasMarks = true - tests = append(tests, test) - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - input := strings.NewReader(tt.text) - output := &strings.Builder{} - status, err := EscapeControlReader(input, output) - result := output.String() - if err != nil { - t.Errorf("EscapeControlReader(): err = %v", err) - } - - if !reflect.DeepEqual(status, tt.status) { - t.Errorf("EscapeControlReader() status = %v, wanted= %v", status, tt.status) - } - if result != tt.result { - t.Errorf("EscapeControlReader()\nresult= %v,\nwanted= %v", result, tt.result) - } - }) - } -} diff --git a/modules/charset/escape.go b/modules/charset/escape.go new file mode 100644 index 000000000000..f212c048244b --- /dev/null +++ b/modules/charset/escape.go @@ -0,0 +1,219 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import ( + "bytes" + "fmt" + "io" + "strings" + "unicode" + "unicode/utf8" + + "golang.org/x/text/unicode/bidi" +) + +// EscapeStatus represents the findings of the unicode escaper +type EscapeStatus struct { + Escaped bool + HasError bool + HasBadRunes bool + HasControls bool + HasSpaces bool + HasMarks bool + HasBIDI bool + BadBIDI bool + HasRTLScript bool + HasLTRScript bool +} + +// Or combines two EscapeStatus structs into one representing the conjunction of the two +func (status EscapeStatus) Or(other EscapeStatus) EscapeStatus { + status.Escaped = status.Escaped || other.Escaped + status.HasError = status.HasError || other.HasError + status.HasBadRunes = status.HasBadRunes || other.HasBadRunes + status.HasControls = status.HasControls || other.HasControls + status.HasSpaces = status.HasSpaces || other.HasSpaces + status.HasMarks = status.HasMarks || other.HasMarks + status.HasBIDI = status.HasBIDI || other.HasBIDI + status.BadBIDI = status.BadBIDI || other.BadBIDI + status.HasRTLScript = status.HasRTLScript || other.HasRTLScript + status.HasLTRScript = status.HasLTRScript || other.HasLTRScript + return status +} + +// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string +func EscapeControlString(text string) (EscapeStatus, string) { + sb := &strings.Builder{} + escaped, _ := EscapeControlReader(strings.NewReader(text), sb) + return escaped, sb.String() +} + +// EscapeControlBytes escapes the unicode control sequences a provided []byte and returns the findings as an EscapeStatus and the escaped []byte +func EscapeControlBytes(text []byte) (EscapeStatus, []byte) { + buf := &bytes.Buffer{} + escaped, _ := EscapeControlReader(bytes.NewReader(text), buf) + return escaped, buf.Bytes() +} + +// EscapeControlReader escapes the unicode control sequences a provided Reader writing the escaped output to the output and returns the findings as an EscapeStatus and an error +func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) { + buf := make([]byte, 4096) + readStart := 0 + var n int + var writePos int + + lineHasBIDI := false + lineHasRTLScript := false + lineHasLTRScript := false + +readingloop: + for err == nil { + n, err = text.Read(buf[readStart:]) + bs := buf[:n+readStart] + i := 0 + + for i < len(bs) { + r, size := utf8.DecodeRune(bs[i:]) + // Now handle the codepoints + switch { + case r == utf8.RuneError: + if writePos < i { + if _, err = output.Write(bs[writePos:i]); err != nil { + escaped.HasError = true + return + } + writePos = i + } + // runes can be at most 4 bytes - so... + if len(bs)-i <= 3 { + // if not request more data + copy(buf, bs[i:]) + readStart = n - i + writePos = 0 + continue readingloop + } + // this is a real broken rune + escaped.HasBadRunes = true + escaped.Escaped = true + if _, err = fmt.Fprintf(output, `<%X>`, bs[i:i+size]); err != nil { + escaped.HasError = true + return + } + writePos += size + case r == '\n': + if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript { + escaped.BadBIDI = true + } + lineHasBIDI = false + lineHasRTLScript = false + lineHasLTRScript = false + + case r == '\r' || r == '\t' || r == ' ': + // These are acceptable control characters and space characters + case unicode.IsSpace(r): + escaped.HasSpaces = true + escaped.Escaped = true + if writePos < i { + if _, err = output.Write(bs[writePos:i]); err != nil { + escaped.HasError = true + return + } + } + if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + escaped.HasError = true + return + } + writePos = i + size + case unicode.Is(unicode.Bidi_Control, r): + escaped.Escaped = true + escaped.HasBIDI = true + if writePos < i { + if _, err = output.Write(bs[writePos:i]); err != nil { + escaped.HasError = true + return + } + } + lineHasBIDI = true + if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + escaped.HasError = true + return + } + writePos = i + size + case unicode.Is(unicode.C, r): + escaped.Escaped = true + escaped.HasControls = true + if writePos < i { + if _, err = output.Write(bs[writePos:i]); err != nil { + escaped.HasError = true + return + } + } + if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + escaped.HasError = true + return + } + writePos = i + size + case unicode.Is(unicode.M, r): + escaped.Escaped = true + escaped.HasMarks = true + if writePos < i { + if _, err = output.Write(bs[writePos:i]); err != nil { + escaped.HasError = true + return + } + } + if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + escaped.HasError = true + return + } + writePos = i + size + default: + p, _ := bidi.Lookup(bs[i : i+size]) + c := p.Class() + if c == bidi.R || c == bidi.AL { + lineHasRTLScript = true + escaped.HasRTLScript = true + } else if c == bidi.L { + lineHasLTRScript = true + escaped.HasLTRScript = true + } + } + i += size + } + if n > 0 { + // we read something... + // write everything unwritten + if writePos < i { + if _, err = output.Write(bs[writePos:i]); err != nil { + escaped.HasError = true + return + } + } + + // reset the starting positions for the next read + readStart = 0 + writePos = 0 + } + } + if readStart > 0 { + // this means that there is an incomplete or broken rune at 0-readStart and we read nothing on the last go round + escaped.Escaped = true + escaped.HasBadRunes = true + if _, err = fmt.Fprintf(output, `<%X>`, buf[:readStart]); err != nil { + escaped.HasError = true + return + } + } + if err == io.EOF { + if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript { + escaped.BadBIDI = true + } + err = nil + return + } + escaped.HasError = true + return +} diff --git a/modules/charset/escape_test.go b/modules/charset/escape_test.go new file mode 100644 index 000000000000..dec92b499299 --- /dev/null +++ b/modules/charset/escape_test.go @@ -0,0 +1,202 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import ( + "reflect" + "strings" + "testing" +) + +type escapeControlTest struct { + name string + text string + status EscapeStatus + result string +} + +var escapeControlTests = []escapeControlTest{ + { + name: "", + }, + { + name: "single line western", + text: "single line western", + result: "single line western", + status: EscapeStatus{HasLTRScript: true}, + }, + { + name: "multi line western", + text: "single line western\nmulti line western\n", + result: "single line western\nmulti line western\n", + status: EscapeStatus{HasLTRScript: true}, + }, + { + name: "multi line western non-breaking space", + text: "single line western\nmulti line western\n", + result: `single line western` + "\n" + `multi line western` + "\n", + status: EscapeStatus{Escaped: true, HasLTRScript: true, HasSpaces: true}, + }, + { + name: "mixed scripts: western + japanese", + text: "日属秘ぞしちゅ。Then some western.", + result: "日属秘ぞしちゅ。Then some western.", + status: EscapeStatus{HasLTRScript: true}, + }, + { + name: "japanese", + text: "日属秘ぞしちゅ。", + result: "日属秘ぞしちゅ。", + status: EscapeStatus{HasLTRScript: true}, + }, + { + name: "hebrew", + text: "עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה", + result: "עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה", + status: EscapeStatus{HasRTLScript: true}, + }, + { + name: "more hebrew", + text: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( ' ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד. + + המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף. + + בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`, + result: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( ' ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד. + + המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף. + + בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`, + status: EscapeStatus{HasRTLScript: true}, + }, + { + name: "Mixed RTL+LTR", + text: `Many computer programs fail to display bidirectional text correctly. +For example, the Hebrew name Sarah (שרה) is spelled: sin (ש) (which appears rightmost), +then resh (ר), and finally heh (ה) (which should appear leftmost).`, + result: `Many computer programs fail to display bidirectional text correctly. +For example, the Hebrew name Sarah (שרה) is spelled: sin (ש) (which appears rightmost), +then resh (ר), and finally heh (ה) (which should appear leftmost).`, + status: EscapeStatus{ + HasRTLScript: true, + HasLTRScript: true, + }, + }, + { + name: "Mixed RTL+LTR+BIDI", + text: `Many computer programs fail to display bidirectional text correctly. + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, + result: `Many computer programs fail to display bidirectional text correctly. + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066" + `` + "\n" + + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, + status: EscapeStatus{ + Escaped: true, + HasBIDI: true, + HasRTLScript: true, + HasLTRScript: true, + }, + }, + { + name: "Accented characters", + text: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}), + result: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}), + status: EscapeStatus{HasLTRScript: true}, + }, + { + name: "Program", + text: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})", + result: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})", + status: EscapeStatus{HasLTRScript: true}, + }, + { + name: "CVE testcase", + text: "if access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {", + result: `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {`, + status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true}, + }, + { + name: "Mixed testcase with fail", + text: `Many computer programs fail to display bidirectional text correctly. + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` + + "\nif access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {\n", + result: `Many computer programs fail to display bidirectional text correctly. + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066" + `` + "\n" + + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` + + "\n" + `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {` + "\n", + status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true}, + }, +} + +func TestEscapeControlString(t *testing.T) { + for _, tt := range escapeControlTests { + t.Run(tt.name, func(t *testing.T) { + status, result := EscapeControlString(tt.text) + if !reflect.DeepEqual(status, tt.status) { + t.Errorf("EscapeControlString() status = %v, wanted= %v", status, tt.status) + } + if result != tt.result { + t.Errorf("EscapeControlString()\nresult= %v,\nwanted= %v", result, tt.result) + } + }) + } +} + +func TestEscapeControlBytes(t *testing.T) { + for _, tt := range escapeControlTests { + t.Run(tt.name, func(t *testing.T) { + status, result := EscapeControlBytes([]byte(tt.text)) + if !reflect.DeepEqual(status, tt.status) { + t.Errorf("EscapeControlBytes() status = %v, wanted= %v", status, tt.status) + } + if string(result) != tt.result { + t.Errorf("EscapeControlBytes()\nresult= %v,\nwanted= %v", result, tt.result) + } + }) + } +} + +func TestEscapeControlReader(t *testing.T) { + // lets add some control characters to the tests + tests := make([]escapeControlTest, 0, len(escapeControlTests)*3) + copy(tests, escapeControlTests) + for _, test := range escapeControlTests { + test.name += " (+Control)" + test.text = "\u001E" + test.text + test.result = `` + "\u001e" + `` + test.result + test.status.Escaped = true + test.status.HasControls = true + tests = append(tests, test) + } + + for _, test := range escapeControlTests { + test.name += " (+Mark)" + test.text = "\u0300" + test.text + test.result = `` + "\u0300" + `` + test.result + test.status.Escaped = true + test.status.HasMarks = true + tests = append(tests, test) + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + input := strings.NewReader(tt.text) + output := &strings.Builder{} + status, err := EscapeControlReader(input, output) + result := output.String() + if err != nil { + t.Errorf("EscapeControlReader(): err = %v", err) + } + + if !reflect.DeepEqual(status, tt.status) { + t.Errorf("EscapeControlReader() status = %v, wanted= %v", status, tt.status) + } + if result != tt.result { + t.Errorf("EscapeControlReader()\nresult= %v,\nwanted= %v", result, tt.result) + } + }) + } +} From cb7d19d1dd332b7745b774c7cdea38617cb8c874 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 29 Nov 2021 19:42:24 +0000 Subject: [PATCH 22/30] extract out functions Signed-off-by: Andrew Thornton --- modules/charset/escape.go | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/modules/charset/escape.go b/modules/charset/escape.go index f212c048244b..a59ca67ed189 100644 --- a/modules/charset/escape.go +++ b/modules/charset/escape.go @@ -98,7 +98,7 @@ readingloop: // this is a real broken rune escaped.HasBadRunes = true escaped.Escaped = true - if _, err = fmt.Fprintf(output, `<%X>`, bs[i:i+size]); err != nil { + if err = writeBroken(output, bs[i:i+size]); err != nil { escaped.HasError = true return } @@ -122,7 +122,7 @@ readingloop: return } } - if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + if err = writeEscaped(output, r); err != nil { escaped.HasError = true return } @@ -137,7 +137,7 @@ readingloop: } } lineHasBIDI = true - if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + if err = writeEscaped(output, r); err != nil { escaped.HasError = true return } @@ -151,7 +151,7 @@ readingloop: return } } - if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + if err = writeEscaped(output, r); err != nil { escaped.HasError = true return } @@ -165,7 +165,7 @@ readingloop: return } } - if _, err = fmt.Fprintf(output, `%c`, r, r); err != nil { + if err = writeEscaped(output, r); err != nil { escaped.HasError = true return } @@ -202,7 +202,7 @@ readingloop: // this means that there is an incomplete or broken rune at 0-readStart and we read nothing on the last go round escaped.Escaped = true escaped.HasBadRunes = true - if _, err = fmt.Fprintf(output, `<%X>`, buf[:readStart]); err != nil { + if err = writeBroken(output, buf[:readStart]); err != nil { escaped.HasError = true return } @@ -217,3 +217,13 @@ readingloop: escaped.HasError = true return } + +func writeBroken(output io.Writer, bs []byte) (err error) { + _, err = fmt.Fprintf(output, `<%X>`, bs) + return +} + +func writeEscaped(output io.Writer, r rune) (err error) { + _, err = fmt.Fprintf(output, `%c`, r, r) + return +} From c55394d745513c67d831838040960bd0548549ad Mon Sep 17 00:00:00 2001 From: zeripath Date: Mon, 29 Nov 2021 20:22:10 +0000 Subject: [PATCH 23/30] Apply suggestions from code review --- web_src/js/features/repo-unicode-escape.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web_src/js/features/repo-unicode-escape.js b/web_src/js/features/repo-unicode-escape.js index 22620958eb2b..62d8878b8cd4 100644 --- a/web_src/js/features/repo-unicode-escape.js +++ b/web_src/js/features/repo-unicode-escape.js @@ -1,17 +1,17 @@ export function initEscapeButton() { - $(document, 'a.escape-button').on('click', (e) => { + $(document).on('click', 'a.escape-button', (e) => { e.preventDefault(); $(e.target).parents('.file-content, .non-diff-file-content').find('.file-code, .file-view').addClass('unicode-escaped'); $(e.target).hide(); $(e.target).siblings('a.unescape-button').show(); }); - $(document, 'a.unescape-button').on('click', (e) => { + $(document).on('click', 'a.unescape-button', (e) => { e.preventDefault(); $(e.target).parents('.file-content, .non-diff-file-content').find('.file-code, .file-view').removeClass('unicode-escaped'); $(e.target).hide(); $(e.target).siblings('a.escape-button').show(); }); - $(document, 'a.toggle-escape-button').on('click', (e) => { + $(document).on('click', 'a.toggle-escape-button', (e) => { e.preventDefault(); const fileContent = $(e.target).parents('.file-content, .non-diff-file-content'); const fileView = fileContent.find('.file-code, .file-view'); From c11bd340fa7568d9915f0bb00c369eeadab90c1b Mon Sep 17 00:00:00 2001 From: zeripath Date: Wed, 8 Dec 2021 19:47:47 +0000 Subject: [PATCH 24/30] Update options/locale/locale_en-US.ini Co-authored-by: Gwyneth Morgan --- options/locale/locale_en-US.ini | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index dba49bea2cd3..a9c086bcb35c 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -988,9 +988,9 @@ file_view_rendered = View Rendered file_view_raw = View Raw file_permalink = Permalink file_too_large = The file is too large to be shown. -bidi_bad_header = `This file contains unexpected BiDi Unicode characters!` -bidi_bad_description = `This file contains unexpected BiDi Unicode characters that may be processed differently from what appears below.
    If your use case is intentional and legitimate, you can safely ignore this warning.
    Use the Escape button to reveal hidden characters.` -bidi_bad_description_escaped = `This file contains unexpected BiDi Unicode characters.
    Hidden unicode characters are escaped below.
    Use the Unescape button to show how they render.` +bidi_bad_header = `This file contains unexpected Bidrectional Unicode characters!` +bidi_bad_description = `This file contains unexpected Bidrectional Unicode characters that may be processed differently from what appears below.
    If your use case is intentional and legitimate, you can safely ignore this warning.
    Use the Escape button to reveal hidden characters.` +bidi_bad_description_escaped = `This file contains unexpected Bidrectional Unicode characters.
    Hidden unicode characters are escaped below.
    Use the Unescape button to show how they render.` unicode_header = `This file contains hidden Unicode characters!` unicode_description = `This file contains hidden Unicode characters that may be processed differently from what appears below.
    If your use case is intentional and legitimate, you can safely ignore this warning.
    Use the Escape button to reveal hidden characters.` unicode_description_escaped = `This file contains hidden Unicode characters.
    Hidden unicode characters are escaped below.
    Use the Unescape button to show how they render.` From 58a4fcc190098898134986cc3c141e66b7635be4 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Wed, 8 Dec 2021 21:34:55 +0000 Subject: [PATCH 25/30] move warning triangle to another column Signed-off-by: Andrew Thornton --- templates/repo/blame.tmpl | 2 +- templates/repo/diff/section_split.tmpl | 22 +++++++++++++++++----- templates/repo/diff/section_unified.tmpl | 8 +++++--- templates/repo/view_file.tmpl | 2 +- web_src/js/features/repo-diff.js | 1 + web_src/less/_repository.less | 5 +++++ web_src/less/_review.less | 6 +++++- 7 files changed, 35 insertions(+), 11 deletions(-) diff --git a/templates/repo/blame.tmpl b/templates/repo/blame.tmpl index 7850b6ccd8ba..f099a0b8ece8 100644 --- a/templates/repo/blame.tmpl +++ b/templates/repo/blame.tmpl @@ -55,7 +55,7 @@ {{if $.EscapeStatus.Escaped}} -
    + {{end}} {{$inlineDiff := $section.GetComputedInlineDiffFor $line}} - + + {{else if and (eq .GetType 3) $hasmatch}}{{/* DEL */}} {{$match := index $section.Lines $line.Match}} + {{- $leftDiff := ""}}{{if $line.LeftIdx}}{{$leftDiff = $section.GetComputedInlineDiffFor $line}}{{end}} + {{- $rightDiff := ""}}{{if $match.RightIdx}}{{$rightDiff = $section.GetComputedInlineDiffFor $match}}{{end}} + + {{else}} + {{$inlineDiff := $section.GetComputedInlineDiffFor $line}} + + + + + + {{end}} + {{$inlineDiff := $section.GetComputedInlineDiffFor $line -}} + {{if eq .GetType 4}} @@ -39,13 +41,13 @@ */}}{{svg "octicon-plus"}}{{/* */}}{{/* */}}{{end}}{{/* - */}}{{$inlineDiff := $section.GetComputedInlineDiffFor $line}}{{$inlineDiff.Content}}{{/* + */}}{{$inlineDiff.Content}}{{/* */}} {{end}} {{if gt (len $line.Comments) 0}} - + diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index 9b24271bb0f6..2152d7a703e5 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -129,7 +129,7 @@ {{if $.EscapeStatus.Escaped}} - + {{end}} diff --git a/web_src/js/features/repo-diff.js b/web_src/js/features/repo-diff.js index 6741f277fb61..179469c90cbe 100644 --- a/web_src/js/features/repo-diff.js +++ b/web_src/js/features/repo-diff.js @@ -126,6 +126,7 @@ export function initRepoDiffShowMore() { } $target.parent().replaceWith($(resp).find('#diff-file-boxes .diff-file-body .file-body').children()); + initRepoIssueContentHistory(); }).fail(() => { $target.removeClass('disabled'); }); diff --git a/web_src/less/_repository.less b/web_src/less/_repository.less index b5f46a2dcc28..3afa56de5725 100644 --- a/web_src/less/_repository.less +++ b/web_src/less/_repository.less @@ -3168,6 +3168,7 @@ td.blob-excerpt { .code-diff-unified .del-code, .code-diff-unified .del-code td, .code-diff-split .del-code .lines-num-old, +.code-diff-split .del-code .lines-escape-old, .code-diff-split .del-code .lines-type-marker-old, .code-diff-split .del-code .lines-code-old { background: var(--color-diff-removed-row-bg); @@ -3178,9 +3179,11 @@ td.blob-excerpt { .code-diff-unified .add-code td, .code-diff-split .add-code .lines-num-new, .code-diff-split .add-code .lines-type-marker-new, +.code-diff-split .add-code .lines-escape-new, .code-diff-split .add-code .lines-code-new, .code-diff-split .del-code .add-code.lines-num-new, .code-diff-split .del-code .add-code.lines-type-marker-new, +.code-diff-split .del-code .add-code.lines-escape-new, .code-diff-split .del-code .add-code.lines-code-new { background: var(--color-diff-added-row-bg); border-color: var(--color-diff-added-row-border); @@ -3189,7 +3192,9 @@ td.blob-excerpt { .code-diff-split .del-code .lines-num-new, .code-diff-split .del-code .lines-type-marker-new, .code-diff-split .del-code .lines-code-new, +.code-diff-split .del-code .lines-escape-new, .code-diff-split .add-code .lines-num-old, +.code-diff-split .add-code .lines-escape-old, .code-diff-split .add-code .lines-type-marker-old, .code-diff-split .add-code .lines-code-old { background: var(--color-diff-inactive); diff --git a/web_src/less/_review.less b/web_src/less/_review.less index 89cb9fe4bcf8..1070ad7ddedc 100644 --- a/web_src/less/_review.less +++ b/web_src/less/_review.less @@ -16,13 +16,17 @@ } } -.diff-file-box .lines-code .code-inner.has-escaped::before { +.lines-escape a.toggle-escape-button::before { visibility: visible; content: '⚠️'; font-family: var(--fonts-emoji); color: var(--color-red); } +.repository .diff-file-box .code-diff td.lines-escape { + padding-left: 0 !important; +} + .diff-file-box .lines-code:hover .ui.button.add-code-comment { opacity: 1; } From 0fc5af7bef2304867f2080f739623bb9d01c3f10 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Thu, 6 Jan 2022 02:44:53 +0100 Subject: [PATCH 26/30] linter ignore bool "suspicious assignment to a by-value method receiver" error --- .golangci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.golangci.yml b/.golangci.yml index 235bb76715bd..d1daddf572a0 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -144,3 +144,7 @@ issues: - path: models/user/openid.go linters: - golint + # does not work with bool + - path: modules/charset/escape.go + linters: + - revive From 1dc8a21f011c3520c1f424d86cb2c77792877d80 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Thu, 6 Jan 2022 10:53:29 +0800 Subject: [PATCH 27/30] fix lint --- .golangci.yml | 4 ---- modules/charset/escape.go | 23 ++++++++++++----------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index d1daddf572a0..235bb76715bd 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -144,7 +144,3 @@ issues: - path: models/user/openid.go linters: - golint - # does not work with bool - - path: modules/charset/escape.go - linters: - - revive diff --git a/modules/charset/escape.go b/modules/charset/escape.go index a59ca67ed189..abe813b465a9 100644 --- a/modules/charset/escape.go +++ b/modules/charset/escape.go @@ -31,17 +31,18 @@ type EscapeStatus struct { // Or combines two EscapeStatus structs into one representing the conjunction of the two func (status EscapeStatus) Or(other EscapeStatus) EscapeStatus { - status.Escaped = status.Escaped || other.Escaped - status.HasError = status.HasError || other.HasError - status.HasBadRunes = status.HasBadRunes || other.HasBadRunes - status.HasControls = status.HasControls || other.HasControls - status.HasSpaces = status.HasSpaces || other.HasSpaces - status.HasMarks = status.HasMarks || other.HasMarks - status.HasBIDI = status.HasBIDI || other.HasBIDI - status.BadBIDI = status.BadBIDI || other.BadBIDI - status.HasRTLScript = status.HasRTLScript || other.HasRTLScript - status.HasLTRScript = status.HasLTRScript || other.HasLTRScript - return status + st := status + st.Escaped = st.Escaped || other.Escaped + st.HasError = st.HasError || other.HasError + st.HasBadRunes = st.HasBadRunes || other.HasBadRunes + st.HasControls = st.HasControls || other.HasControls + st.HasSpaces = st.HasSpaces || other.HasSpaces + st.HasMarks = st.HasMarks || other.HasMarks + st.HasBIDI = st.HasBIDI || other.HasBIDI + st.BadBIDI = st.BadBIDI || other.BadBIDI + st.HasRTLScript = st.HasRTLScript || other.HasRTLScript + st.HasLTRScript = st.HasLTRScript || other.HasLTRScript + return st } // EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string From 6f99bfd4600848e323e238a53f5ea26f5b4b82b2 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Thu, 6 Jan 2022 14:50:12 +0800 Subject: [PATCH 28/30] refactoring --- services/gitdiff/gitdiff.go | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 01e72ab3deba..6c6cce14146e 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -171,11 +171,7 @@ func getDiffLineSectionInfo(treePath, line string, lastLeftIdx, lastRightIdx int // escape a line's content or return
    needed for copy/paste purposes func getLineContent(content string) DiffInline { if len(content) > 0 { - status, content := charset.EscapeControlString(content) - return DiffInline{ - EscapeStatus: status, - Content: template.HTML(html.EscapeString(content)), - } + return DiffInlineWithUnicodeEscape(template.HTML(html.EscapeString(content))) } return DiffInline{Content: "
    "} } @@ -487,8 +483,7 @@ func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineT buf.Write(codeTagSuffix) } } - status, content := charset.EscapeControlString(buf.String()) - return DiffInline{EscapeStatus: status, Content: template.HTML(content)} + return DiffInlineWithUnicodeEscape(template.HTML(buf.String())) } // GetLine gets a specific line by type (add or del) and file line number @@ -546,6 +541,16 @@ type DiffInline struct { Content template.HTML } +func DiffInlineWithUnicodeEscape(s template.HTML) DiffInline { + status, content := charset.EscapeControlString(string(s)) + return DiffInline{EscapeStatus: status, Content: template.HTML(content)} +} + +func DiffInlineWithHighlightCode(fileName, language, code string) DiffInline { + status, content := charset.EscapeControlString(highlight.Code(fileName, language, code)) + return DiffInline{EscapeStatus: status, Content: template.HTML(content)} +} + // GetComputedInlineDiffFor computes inline diff for the given line. func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) DiffInline { if setting.Git.DisableDiffHighlight { @@ -570,26 +575,22 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif case DiffLineAdd: compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx) if compareDiffLine == nil { - status, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, language, diffLine.Content[1:])) - return DiffInline{EscapeStatus: status, Content: template.HTML(escaped)} + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:]) } diff1 = compareDiffLine.Content diff2 = diffLine.Content case DiffLineDel: compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx) if compareDiffLine == nil { - status, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, language, diffLine.Content[1:])) - return DiffInline{EscapeStatus: status, Content: template.HTML(escaped)} + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:]) } diff1 = diffLine.Content diff2 = compareDiffLine.Content default: if strings.IndexByte(" +-", diffLine.Content[0]) > -1 { - status, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, language, diffLine.Content[1:])) - return DiffInline{EscapeStatus: status, Content: template.HTML(escaped)} + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:]) } - status, escaped := charset.EscapeControlString(highlight.Code(diffSection.FileName, language, diffLine.Content)) - return DiffInline{EscapeStatus: status, Content: template.HTML(escaped)} + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content) } diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, language, diff1[1:]), highlight.Code(diffSection.FileName, language, diff2[1:]), true) From ab6db78982636ff9467bfd40af5f7d7c778daf3b Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Thu, 6 Jan 2022 16:06:43 +0800 Subject: [PATCH 29/30] refactor --- options/locale/locale_en-US.ini | 10 ++-- services/gitdiff/gitdiff.go | 2 + templates/repo/settings/lfs_file.tmpl | 18 +------ templates/repo/unicode_escape_prompt.tmpl | 17 +++++++ templates/repo/view_file.tmpl | 18 +------ templates/repo/wiki/view.tmpl | 56 +++------------------- web_src/js/features/common-global.js | 20 +++++--- web_src/js/features/repo-legacy.js | 4 +- web_src/js/features/repo-unicode-escape.js | 2 +- web_src/less/_base.less | 2 + web_src/less/_repository.less | 17 ++++--- 11 files changed, 58 insertions(+), 108 deletions(-) create mode 100644 templates/repo/unicode_escape_prompt.tmpl diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 42c21f51b39d..eacd74e1a006 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1005,12 +1005,12 @@ file_view_rendered = View Rendered file_view_raw = View Raw file_permalink = Permalink file_too_large = The file is too large to be shown. -bidi_bad_header = `This file contains unexpected Bidrectional Unicode characters!` -bidi_bad_description = `This file contains unexpected Bidrectional Unicode characters that may be processed differently from what appears below.
    If your use case is intentional and legitimate, you can safely ignore this warning.
    Use the Escape button to reveal hidden characters.` -bidi_bad_description_escaped = `This file contains unexpected Bidrectional Unicode characters.
    Hidden unicode characters are escaped below.
    Use the Unescape button to show how they render.` +bidi_bad_header = `This file contains unexpected Bidirectional Unicode characters!` +bidi_bad_description = `This file contains unexpected Bidirectional Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.` +bidi_bad_description_escaped = `This file contains unexpected Bidirectional Unicode characters. Hidden unicode characters are escaped below. Use the Unescape button to show how they render.` unicode_header = `This file contains hidden Unicode characters!` -unicode_description = `This file contains hidden Unicode characters that may be processed differently from what appears below.
    If your use case is intentional and legitimate, you can safely ignore this warning.
    Use the Escape button to reveal hidden characters.` -unicode_description_escaped = `This file contains hidden Unicode characters.
    Hidden unicode characters are escaped below.
    Use the Unescape button to show how they render.` +unicode_description = `This file contains hidden Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.` +unicode_description_escaped = `This file contains hidden Unicode characters. Hidden unicode characters are escaped below. Use the Unescape button to show how they render.` line_unicode = `This line has hidden unicode characters` escape_control_characters = Escape diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 6c6cce14146e..292c270b7e3e 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -541,11 +541,13 @@ type DiffInline struct { Content template.HTML } +// DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped func DiffInlineWithUnicodeEscape(s template.HTML) DiffInline { status, content := charset.EscapeControlString(string(s)) return DiffInline{EscapeStatus: status, Content: template.HTML(content)} } +// DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped func DiffInlineWithHighlightCode(fileName, language, code string) DiffInline { status, content := charset.EscapeControlString(highlight.Code(fileName, language, code)) return DiffInline{EscapeStatus: status, Content: template.HTML(content)} diff --git a/templates/repo/settings/lfs_file.tmpl b/templates/repo/settings/lfs_file.tmpl index 50a64685df56..a4d6b21f1c9e 100644 --- a/templates/repo/settings/lfs_file.tmpl +++ b/templates/repo/settings/lfs_file.tmpl @@ -16,23 +16,7 @@
    - {{if .EscapeStatus.BadBIDI}} -
    - {{svg "octicon-x" 16 "close inside"}} -
    - {{.i18n.Tr "repo.bidi_bad_header"}} -
    -

    {{.i18n.Tr "repo.bidi_bad_description" | Str2html}}

    -
    - {{else if .EscapeStatus.Escaped}} -
    - {{svg "octicon-x" 16 "close inside"}} -
    - {{.i18n.Tr "repo.unicode_header"}} -
    -

    {{.i18n.Tr "repo.unicode_description" | Str2html}}

    -
    - {{end}} + {{template "repo/unicode_escape_prompt" dict "EscapeStatus" .EscapeStatus "root" $}}
    {{if .IsMarkup}} {{if .FileContent}}{{.FileContent | Safe}}{{end}} diff --git a/templates/repo/unicode_escape_prompt.tmpl b/templates/repo/unicode_escape_prompt.tmpl new file mode 100644 index 000000000000..d45df012e159 --- /dev/null +++ b/templates/repo/unicode_escape_prompt.tmpl @@ -0,0 +1,17 @@ +{{if .EscapeStatus.BadBIDI}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{$.root.i18n.Tr "repo.bidi_bad_header"}} +
    +

    {{$.root.i18n.Tr "repo.bidi_bad_description" | Str2html}}

    +
    +{{else if .EscapeStatus.Escaped}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{$.root.i18n.Tr "repo.unicode_header"}} +
    +

    {{$.root.i18n.Tr "repo.unicode_description" | Str2html}}

    +
    +{{end}} diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index 10876a3c7dc3..d5308c154b4e 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -72,23 +72,7 @@
    - {{if .EscapeStatus.BadBIDI}} -
    - {{svg "octicon-x" 16 "close inside"}} -
    - {{.i18n.Tr "repo.bidi_bad_header"}} -
    -

    {{.i18n.Tr "repo.bidi_bad_description" | Str2html}}

    -
    - {{else if .EscapeStatus.Escaped}} -
    - {{svg "octicon-x" 16 "close inside"}} -
    - {{.i18n.Tr "repo.unicode_header"}} -
    -

    {{.i18n.Tr "repo.unicode_description" | Str2html}}

    -
    - {{end}} + {{template "repo/unicode_escape_prompt" dict "EscapeStatus" .EscapeStatus "root" $}}
    {{if .IsMarkup}} {{if .FileContent}}{{.FileContent | Safe}}{{end}} diff --git a/templates/repo/wiki/view.tmpl b/templates/repo/wiki/view.tmpl index f8b07dfdefe8..db0ce148785f 100644 --- a/templates/repo/wiki/view.tmpl +++ b/templates/repo/wiki/view.tmpl @@ -65,72 +65,28 @@
    {{end}}
    -
    - {{if .EscapeStatus.BadBIDI}} -
    - {{svg "octicon-x" 16 "close inside"}} -
    - {{.i18n.Tr "repo.bidi_bad_header"}} -
    -

    {{.i18n.Tr "repo.bidi_bad_description" | Str2html}}

    -
    - {{else if .EscapeStatus.Escaped}} -
    - {{svg "octicon-x" 16 "close inside"}} -
    - {{.i18n.Tr "repo.unicode_header"}} -
    -

    {{.i18n.Tr "repo.unicode_description" | Str2html}}

    -
    - {{end}} +
    + {{template "repo/unicode_escape_prompt" dict "EscapeStatus" .EscapeStatus "root" $}} {{.content | Safe}}
    {{if .sidebarPresent}}
    -
    +
    {{end}}
    {{if .footerPresent}} -
    + {{end}} diff --git a/web_src/js/features/common-global.js b/web_src/js/features/common-global.js index 2384cff15896..bf9d21ac49b1 100644 --- a/web_src/js/features/common-global.js +++ b/web_src/js/features/common-global.js @@ -81,12 +81,6 @@ export function initGlobalCommon() { } // Semantic UI modules. - $('.message .close').on('click', function() { - $(this) - .closest('.message') - .hide(); - }); - $('.dropdown:not(.custom)').dropdown({ fullTextSearch: 'exact' }); @@ -303,8 +297,20 @@ export function initGlobalButtons() { }); $('.hide-panel.button').on('click', function (event) { - $($(this).data('panel')).hide(); + // a `.hide-panel.button` can hide a panel, by `data-panel="selector"` or `data-panel-closest="selector"` event.preventDefault(); + let sel = $(this).attr('data-panel'); + if (sel) { + $(sel).hide(); + return; + } + sel = $(this).attr('data-panel-closest'); + if (sel) { + $(this).closest(sel).hide(); + return; + } + // should never happen, otherwise there is a bug in code + alert('Nothing to hide'); }); $('.show-modal.button').on('click', function () { diff --git a/web_src/js/features/repo-legacy.js b/web_src/js/features/repo-legacy.js index fb53d18c234e..c364beada9a4 100644 --- a/web_src/js/features/repo-legacy.js +++ b/web_src/js/features/repo-legacy.js @@ -10,7 +10,7 @@ import { initRepoIssueWipToggle, initRepoPullRequestMerge, initRepoPullRequestUpdate, updateIssuesMeta, } from './repo-issue.js'; -import {initEscapeButton} from './repo-unicode-escape.js'; +import {initUnicodeEscapeButton} from './repo-unicode-escape.js'; import {svg} from '../svg.js'; import {htmlEscape} from 'escape-goat'; import {initRepoBranchTagDropdown} from '../components/RepoBranchTagDropdown.js'; @@ -535,7 +535,7 @@ export function initRepository() { }); } - initEscapeButton(); + initUnicodeEscapeButton(); } function initRepoIssueCommentEdit() { diff --git a/web_src/js/features/repo-unicode-escape.js b/web_src/js/features/repo-unicode-escape.js index 62d8878b8cd4..5791c23155aa 100644 --- a/web_src/js/features/repo-unicode-escape.js +++ b/web_src/js/features/repo-unicode-escape.js @@ -1,4 +1,4 @@ -export function initEscapeButton() { +export function initUnicodeEscapeButton() { $(document).on('click', 'a.escape-button', (e) => { e.preventDefault(); $(e.target).parents('.file-content, .non-diff-file-content').find('.file-code, .file-view').addClass('unicode-escaped'); diff --git a/web_src/less/_base.less b/web_src/less/_base.less index c19030cccf4c..1856cee7a000 100644 --- a/web_src/less/_base.less +++ b/web_src/less/_base.less @@ -1575,9 +1575,11 @@ a.ui.label:hover { } } +/* I am not sure why this is needed, if yes, we need a comment .lines-escape { width: 0; } +*/ .lines-code { background-color: var(--color-code-bg); diff --git a/web_src/less/_repository.less b/web_src/less/_repository.less index ae27175f807a..4894a0a2c92b 100644 --- a/web_src/less/_repository.less +++ b/web_src/less/_repository.less @@ -3038,18 +3038,17 @@ td.blob-excerpt { padding-left: 8px; } -.file-message { - margin-bottom: 0 !important; -} - -.file-message, -.wiki-message { - border-radius: 0 !important; +.ui.message.unicode-escape-prompt { + margin-bottom: 0; + border-radius: 0; display: flex; flex-direction: column; +} + +.wiki-content-sidebar .ui.message.unicode-escape-prompt, +.wiki-content-footer .ui.message.unicode-escape-prompt { p { - text-align: left; - align-self: center; + display: none; } } From 4e1b449035852e63ae958209d41636d765cb0caf Mon Sep 17 00:00:00 2001 From: zeripath Date: Thu, 6 Jan 2022 19:43:59 +0000 Subject: [PATCH 30/30] Apply suggestions from code review --- web_src/less/_base.less | 2 -- 1 file changed, 2 deletions(-) diff --git a/web_src/less/_base.less b/web_src/less/_base.less index 1856cee7a000..c19030cccf4c 100644 --- a/web_src/less/_base.less +++ b/web_src/less/_base.less @@ -1575,11 +1575,9 @@ a.ui.label:hover { } } -/* I am not sure why this is needed, if yes, we need a comment .lines-escape { width: 0; } -*/ .lines-code { background-color: var(--color-code-bg);
    {{if $row.EscapeStatus.Escaped}}⚠️{{end}} {{$row.Code}} {{if (index $.LineEscapeStatus $idx).Escaped}}⚠️{{end}} {{$code | Safe}} {{$.section.GetComputedInlineDiffFor $line}}{{$inlineDiff := $.section.GetComputedInlineDiffFor $line}}{{$inlineDiff}}{{$inlineDiff.Content}} {{if $line.LeftIdx}}{{end}}{{if $line.LeftIdx}}{{$.section.GetComputedInlineDiffFor $line}}{{end}}{{/* + */}}{{if $line.LeftIdx}}{{/* + */}}{{$inlineDiff := $.section.GetComputedInlineDiffFor $line}}{{$inlineDiff.Content}}{{/* + */}}{{else}}{{/* + */}}{{/* + */}}{{end}}{{/* + */}} {{if $line.RightIdx}}{{end}}{{if $line.RightIdx}}{{$.section.GetComputedInlineDiffFor $line}}{{end}}{{/* + */}}{{if $line.RightIdx}}{{/* + */}}{{$inlineDiff := $.section.GetComputedInlineDiffFor $line}}{{$inlineDiff.Content}}{{/* + */}}{{else}}{{/* + */}}{{/* + */}}{{end}}{{/* + */}}
    {{$section.GetComputedInlineDiffFor $line}}{{$inlineDiff.Content}} {{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{svg "octicon-plus"}}{{end}}{{if $line.LeftIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}}{{/* + */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{/* + */}}{{/* + */}}{{svg "octicon-plus"}}{{/* + */}}{{/* + */}}{{end}}{{/* + */}}{{if $line.LeftIdx}}{{/* + */}}{{$inlineDiff := $section.GetComputedInlineDiffFor $line}}{{$inlineDiff.Content}}{{/* + */}}{{else}}{{/* + */}}{{/* + */}}{{end}}{{/* + */}} {{if $match.RightIdx}}{{end}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{svg "octicon-plus"}}{{end}}{{if $match.RightIdx}}{{$section.GetComputedInlineDiffFor $match}}{{end}}{{/* + */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{/* + */}}{{/* + */}}{{svg "octicon-plus"}}{{/* + */}}{{/* + */}}{{end}}{{/* + */}}{{if $match.RightIdx}}{{/* + */}}{{$inlineDiff := $section.GetComputedInlineDiffFor $match}}{{$inlineDiff.Content}}{{/* + */}}{{else}}{{/* + */}}{{/* + */}}{{end}}{{/* + */}} {{if $line.LeftIdx}}{{end}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 2))}}{{svg "octicon-plus"}}{{end}}{{if $line.LeftIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}}{{/* + */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 2))}}{{/* + */}}{{/* + */}}{{svg "octicon-plus"}}{{/* + */}}{{/* + */}}{{end}}{{/* + */}}{{if $line.LeftIdx}}{{/* + */}}{{$inlineDiff := $section.GetComputedInlineDiffFor $line}}{{$inlineDiff.Content}}{{/* + */}}{{else}}{{/* + */}}{{/* + */}}{{end}}{{/* + */}} {{if $line.RightIdx}}{{end}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 3))}}{{svg "octicon-plus"}}{{end}}{{if $line.RightIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}}{{/* + */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 3))}}{{/* + */}}{{/* + */}}{{svg "octicon-plus"}}{{/* + */}}{{/* + */}}{{end}}{{/* + */}}{{if $line.RightIdx}}{{/* + */}}{{$inlineDiff := $section.GetComputedInlineDiffFor $line}}{{$inlineDiff.Content}}{{/* + */}}{{else}}{{/* + */}}{{/* + */}}{{end}}{{/* + */}}
    {{$section.GetComputedInlineDiffFor $line}}{{/* + */}}{{$inlineDiff := $section.GetComputedInlineDiffFor $line}}{{$inlineDiff.Content}}{{/* + */}} + {{$line.Content}} + {{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{svg "octicon-plus"}}{{end}}{{$section.GetComputedInlineDiffFor $line}}{{/* + */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{/* + */}}{{/* + */}}{{svg "octicon-plus"}}{{/* + */}}{{/* + */}}{{end}}{{/* + */}}{{$inlineDiff := $section.GetComputedInlineDiffFor $line}}{{$inlineDiff.Content}}{{/* + */}}
    {{if $row.EscapeStatus.Escaped}}⚠️{{end}}{{if $row.EscapeStatus.Escaped}}{{end}} {{$row.Code}} diff --git a/templates/repo/diff/section_split.tmpl b/templates/repo/diff/section_split.tmpl index 64ffddd999a9..754f7cec10e0 100644 --- a/templates/repo/diff/section_split.tmpl +++ b/templates/repo/diff/section_split.tmpl @@ -22,10 +22,14 @@ {{end}} {{$inlineDiff.Content}}{{if $inlineDiff.EscapeStatus.Escaped}}{{end}}{{$inlineDiff.Content}}{{if $line.LeftIdx}}{{if $leftDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{/* @@ -34,12 +38,13 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $line.LeftIdx}}{{/* - */}}{{$inlineDiff := $section.GetComputedInlineDiffFor $line}}{{$inlineDiff.Content}}{{/* + */}}{{$leftDiff.Content}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* */}} {{if $match.RightIdx}}{{if $rightDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{if $match.RightIdx}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{/* @@ -48,13 +53,15 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $match.RightIdx}}{{/* - */}}{{$inlineDiff := $section.GetComputedInlineDiffFor $match}}{{$inlineDiff.Content}}{{/* + */}}{{$rightDiff.Content}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* */}}{{if $line.LeftIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{if $line.LeftIdx}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 2))}}{{/* @@ -63,12 +70,13 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $line.LeftIdx}}{{/* - */}}{{$inlineDiff := $section.GetComputedInlineDiffFor $line}}{{$inlineDiff.Content}}{{/* + */}}{{$inlineDiff.Content}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* */}} {{if $line.RightIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{if $line.RightIdx}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 3))}}{{/* @@ -77,7 +85,7 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $line.RightIdx}}{{/* - */}}{{$inlineDiff := $section.GetComputedInlineDiffFor $line}}{{$inlineDiff.Content}}{{/* + */}}{{$inlineDiff.Content}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* @@ -89,6 +97,7 @@ {{if or (gt (len $line.Comments) 0) (gt (len $match.Comments) 0)}}
    {{if gt (len $line.Comments) 0}} @@ -103,6 +112,7 @@ {{end}} {{if eq $line.GetCommentSide "proposed"}} @@ -119,6 +129,7 @@ {{else if gt (len $line.Comments) 0}}
    {{if gt (len $line.Comments) 0}} @@ -128,6 +139,7 @@ {{end}} {{if eq $line.GetCommentSide "proposed"}} diff --git a/templates/repo/diff/section_unified.tmpl b/templates/repo/diff/section_unified.tmpl index 2d39088d7e5f..93f9af52b474 100644 --- a/templates/repo/diff/section_unified.tmpl +++ b/templates/repo/diff/section_unified.tmpl @@ -25,10 +25,12 @@ {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} {{/* - */}}{{$inlineDiff := $section.GetComputedInlineDiffFor $line}}{{$inlineDiff.Content}}{{/* + */}}{{$inlineDiff.Content}}{{/* */}} {{$line.Content}}
    {{template "repo/diff/conversation" mergeinto $.root "comments" $line.Comments}}
    {{if (index $.LineEscapeStatus $idx).Escaped}}⚠️{{end}}{{if (index $.LineEscapeStatus $idx).Escaped}}{{end}}{{$code | Safe}}