Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to disable ambiguous unicode characters detection #28454

Merged
merged 4 commits into from
Dec 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions custom/conf/app.example.ini
Original file line number Diff line number Diff line change
Expand Up @@ -1212,6 +1212,9 @@ LEVEL = Info
;; Max size of files to be displayed (default is 8MiB)
;MAX_DISPLAY_FILE_SIZE = 8388608
;;
;; Detect ambiguous unicode characters in file contents and show warnings on the UI
;AMBIGUOUS_UNICODE_DETECTION = true
;;
;; Whether the email of the user should be shown in the Explore Users page
;SHOW_USER_EMAIL = true
;;
Expand Down
1 change: 1 addition & 0 deletions docs/content/administration/config-cheat-sheet.en-us.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ The following configuration set `Content-Type: application/vnd.android.package-a
- `THEMES`: **gitea-auto,gitea-light,gitea-dark**: All available themes. Allow users select personalized themes.
regardless of the value of `DEFAULT_THEME`.
- `MAX_DISPLAY_FILE_SIZE`: **8388608**: Max size of files to be displayed (default is 8MiB)
- `AMBIGUOUS_UNICODE_DETECTION`: **true**: Detect ambiguous unicode characters in file contents and show warnings on the UI
- `REACTIONS`: All available reactions users can choose on issues/prs and comments
Values can be emoji alias (:smile:) or a unicode emoji.
For custom reactions, add a tightly cropped square image to public/assets/img/emoji/reaction_name.png
Expand Down
59 changes: 10 additions & 49 deletions modules/charset/escape.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,31 @@
package charset

import (
"bufio"
"html/template"
"io"
"strings"

"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/translation"
)

// RuneNBSP is the codepoint for NBSP
const RuneNBSP = 0xa0

// EscapeControlHTML escapes the unicode control sequences in a provided html document
func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) {
func EscapeControlHTML(html template.HTML, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output template.HTML) {
sb := &strings.Builder{}
outputStream := &HTMLStreamerWriter{Writer: sb}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)

if err := StreamHTML(strings.NewReader(text), streamer); err != nil {
streamer.escaped.HasError = true
log.Error("Error whilst escaping: %v", err)
}
return streamer.escaped, sb.String()
escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, allowed...) // err has been handled in EscapeControlReader
return escaped, template.HTML(sb.String())
}

// EscapeControlReaders escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte
// EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
if !setting.UI.AmbiguousUnicodeDetection {
_, err = io.Copy(writer, reader)
return &EscapeStatus{}, err
}
outputStream := &HTMLStreamerWriter{Writer: writer}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)

Expand All @@ -43,41 +42,3 @@ func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.
}
return streamer.escaped, err
}

// EscapeControlStringReader escapes the unicode control sequences in a provided reader of string content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte. HTML line breaks are not inserted after every newline by this method.
func EscapeControlStringReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
bufRd := bufio.NewReader(reader)
outputStream := &HTMLStreamerWriter{Writer: writer}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)

for {
line, rdErr := bufRd.ReadString('\n')
if len(line) > 0 {
if err := streamer.Text(line); err != nil {
streamer.escaped.HasError = true
log.Error("Error whilst escaping: %v", err)
return streamer.escaped, err
}
}
if rdErr != nil {
if rdErr != io.EOF {
err = rdErr
}
break
}
}
return streamer.escaped, err
}

// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string
func EscapeControlString(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) {
sb := &strings.Builder{}
outputStream := &HTMLStreamerWriter{Writer: sb}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)

if err := streamer.Text(text); err != nil {
streamer.escaped.HasError = true
log.Error("Error whilst escaping: %v", err)
}
return streamer.escaped, sb.String()
}
2 changes: 1 addition & 1 deletion modules/charset/escape_stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func (e *escapeStreamer) Text(data string) error {
until, next = nextIdxs[0]+pos, nextIdxs[1]+pos
}

// from pos until until we know that the runes are not \r\t\n or even ' '
// from pos until we know that the runes are not \r\t\n or even ' '
runes := make([]rune, 0, next-until)
positions := make([]int, 0, next-until+1)

Expand Down
52 changes: 16 additions & 36 deletions modules/charset/escape_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@
package charset

import (
"reflect"
"strings"
"testing"

"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
"code.gitea.io/gitea/modules/translation"

"github.com/stretchr/testify/assert"
)

type escapeControlTest struct {
Expand Down Expand Up @@ -132,22 +135,8 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`,
},
}

func TestEscapeControlString(t *testing.T) {
for _, tt := range escapeControlTests {
t.Run(tt.name, func(t *testing.T) {
status, result := EscapeControlString(tt.text, &translation.MockLocale{})
if !reflect.DeepEqual(*status, tt.status) {
t.Errorf("EscapeControlString() status = %v, wanted= %v", status, tt.status)
}
if result != tt.result {
t.Errorf("EscapeControlString()\nresult= %v,\nwanted= %v", result, tt.result)
}
})
}
}

func TestEscapeControlReader(t *testing.T) {
// lets add some control characters to the tests
// add some control characters to the tests
tests := make([]escapeControlTest, 0, len(escapeControlTests)*3)
copy(tests, escapeControlTests)

Expand All @@ -169,29 +158,20 @@ func TestEscapeControlReader(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
input := strings.NewReader(tt.text)
output := &strings.Builder{}
status, err := EscapeControlReader(input, output, &translation.MockLocale{})
result := output.String()
if err != nil {
t.Errorf("EscapeControlReader(): err = %v", err)
}

if !reflect.DeepEqual(*status, tt.status) {
t.Errorf("EscapeControlReader() status = %v, wanted= %v", status, tt.status)
}
if result != tt.result {
t.Errorf("EscapeControlReader()\nresult= %v,\nwanted= %v", result, tt.result)
}
status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{})
assert.NoError(t, err)
assert.Equal(t, tt.status, *status)
assert.Equal(t, tt.result, output.String())
})
}
}

func TestEscapeControlReader_panic(t *testing.T) {
bs := make([]byte, 0, 20479)
bs = append(bs, 'A')
for i := 0; i < 6826; i++ {
bs = append(bs, []byte("—")...)
}
_, _ = EscapeControlString(string(bs), &translation.MockLocale{})
func TestSettingAmbiguousUnicodeDetection(t *testing.T) {
defer test.MockVariableValue(&setting.UI.AmbiguousUnicodeDetection, true)()
_, out := EscapeControlHTML("a test", &translation.MockLocale{})
assert.EqualValues(t, `a<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>test`, out)
setting.UI.AmbiguousUnicodeDetection = false
_, out = EscapeControlHTML("a test", &translation.MockLocale{})
assert.EqualValues(t, `a test`, out)
}
11 changes: 3 additions & 8 deletions modules/git/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
"os/exec"
"strings"
"time"
"unsafe"

"code.gitea.io/gitea/modules/git/internal" //nolint:depguard // only this file can use the internal type CmdArg, other files and packages should use AddXxx functions
"code.gitea.io/gitea/modules/log"
Expand Down Expand Up @@ -389,15 +388,11 @@ func (r *runStdError) IsExitCode(code int) bool {
return false
}

func bytesToString(b []byte) string {
return *(*string)(unsafe.Pointer(&b)) // that's what Golang's strings.Builder.String() does (go/src/strings/builder.go)
}

// RunStdString runs the command with options and returns stdout/stderr as string. and store stderr to returned error (err combined with stderr).
func (c *Command) RunStdString(opts *RunOpts) (stdout, stderr string, runErr RunStdError) {
stdoutBytes, stderrBytes, err := c.RunStdBytes(opts)
stdout = bytesToString(stdoutBytes)
stderr = bytesToString(stderrBytes)
stdout = util.UnsafeBytesToString(stdoutBytes)
stderr = util.UnsafeBytesToString(stderrBytes)
if err != nil {
return stdout, stderr, &runStdError{err: err, stderr: stderr}
}
Expand Down Expand Up @@ -432,7 +427,7 @@ func (c *Command) RunStdBytes(opts *RunOpts) (stdout, stderr []byte, runErr RunS
err := c.Run(newOpts)
stderr = stderrBuf.Bytes()
if err != nil {
return nil, stderr, &runStdError{err: err, stderr: bytesToString(stderr)}
return nil, stderr, &runStdError{err: err, stderr: util.UnsafeBytesToString(stderr)}
}
// even if there is no err, there could still be some stderr output
return stdoutBuf.Bytes(), stderr, nil
Expand Down
29 changes: 14 additions & 15 deletions modules/highlight/highlight.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"bytes"
"fmt"
gohtml "html"
"html/template"
"io"
"path/filepath"
"strings"
Expand Down Expand Up @@ -55,7 +56,7 @@ func NewContext() {
}

// Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name
func Code(fileName, language, code string) (string, string) {
func Code(fileName, language, code string) (output template.HTML, lexerName string) {
NewContext()

// diff view newline will be passed as empty, change to literal '\n' so it can be copied
Expand All @@ -65,7 +66,7 @@ func Code(fileName, language, code string) (string, string) {
}

if len(code) > sizeLimit {
return code, ""
return template.HTML(template.HTMLEscapeString(code)), ""
}

var lexer chroma.Lexer
Expand Down Expand Up @@ -102,13 +103,11 @@ func Code(fileName, language, code string) (string, string) {
cache.Add(fileName, lexer)
}

lexerName := formatLexerName(lexer.Config().Name)

return CodeFromLexer(lexer, code), lexerName
return CodeFromLexer(lexer, code), formatLexerName(lexer.Config().Name)
}

// CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes
func CodeFromLexer(lexer chroma.Lexer, code string) string {
func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML {
formatter := html.New(html.WithClasses(true),
html.WithLineNumbers(false),
html.PreventSurroundingPre(true),
Expand All @@ -120,23 +119,23 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string {
iterator, err := lexer.Tokenise(nil, code)
if err != nil {
log.Error("Can't tokenize code: %v", err)
return code
return template.HTML(template.HTMLEscapeString(code))
}
// style not used for live site but need to pass something
err = formatter.Format(htmlw, githubStyles, iterator)
if err != nil {
log.Error("Can't format code: %v", err)
return code
return template.HTML(template.HTMLEscapeString(code))
}

_ = htmlw.Flush()
// Chroma will add newlines for certain lexers in order to highlight them properly
// Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
return strings.TrimSuffix(htmlbuf.String(), "\n")
return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n"))
}

// File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
func File(fileName, language string, code []byte) ([]string, string, error) {
func File(fileName, language string, code []byte) ([]template.HTML, string, error) {
NewContext()

if len(code) > sizeLimit {
Expand Down Expand Up @@ -183,24 +182,24 @@ func File(fileName, language string, code []byte) ([]string, string, error) {
tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens())
htmlBuf := &bytes.Buffer{}

lines := make([]string, 0, len(tokensLines))
lines := make([]template.HTML, 0, len(tokensLines))
for _, tokens := range tokensLines {
iterator = chroma.Literator(tokens...)
err = formatter.Format(htmlBuf, githubStyles, iterator)
if err != nil {
return nil, "", fmt.Errorf("can't format code: %w", err)
}
lines = append(lines, htmlBuf.String())
lines = append(lines, template.HTML(htmlBuf.String()))
htmlBuf.Reset()
}

return lines, lexerName, nil
}

// PlainText returns non-highlighted HTML for code
func PlainText(code []byte) []string {
func PlainText(code []byte) []template.HTML {
r := bufio.NewReader(bytes.NewReader(code))
m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1)
m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
for {
content, err := r.ReadString('\n')
if err != nil && err != io.EOF {
Expand All @@ -210,7 +209,7 @@ func PlainText(code []byte) []string {
if content == "" && err == io.EOF {
break
}
s := gohtml.EscapeString(content)
s := template.HTML(gohtml.EscapeString(content))
m = append(m, s)
}
return m
Expand Down
Loading