Skip to content

Commit

Permalink
[winlogbeat] Clean up rune handling (#32198)
Browse files Browse the repository at this point in the history
* Take into account RuneErrors when decoding XML codes

* Add more unit tests

* Parse multibyte utf8 encodings at the end of backing array
  • Loading branch information
marc-gr authored and chrisberkhout committed Jun 1, 2023
1 parent 3170df9 commit 76b958e
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 2 deletions.
14 changes: 12 additions & 2 deletions libbeat/common/encoding/xml/safe_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,22 @@ func (r *SafeReader) Read(d []byte) (n int, err error) {
return output(n)
}
if len(r.buf) == 0 {
n, _ = r.inner.Read(r.backing[:])
n, _ := r.inner.Read(r.backing[:])
r.buf = r.backing[:n]
}
for i := 0; i < len(r.buf); {
if !utf8.FullRune(r.buf[i:]) {
n = copy(d, r.buf[:i])
rn := copy(r.backing[:], r.buf[i:])
nn, err := r.inner.Read(r.backing[rn:])
if err != nil && err != io.EOF {
return 0, err
}
r.buf = r.backing[:rn+nn]
return output(n)
}
code, size := utf8.DecodeRune(r.buf[i:])
if !unicode.IsSpace(code) && unicode.IsControl(code) {
if code == utf8.RuneError || (!unicode.IsSpace(code) && unicode.IsControl(code)) {
n = copy(d, r.buf[:i])
r.buf = r.buf[n+1:]
r.code = []byte(fmt.Sprintf("\\u%04x", code))
Expand Down
139 changes: 139 additions & 0 deletions libbeat/common/encoding/xml/safe_reader_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//go:build !integration
// +build !integration

package xml

import (
"encoding/xml"
"strings"
"testing"

"github.com/stretchr/testify/assert"
)

func TestInvalidXMLIsSanitized(t *testing.T) {
tests := []struct {
name string
input []byte
want map[string]interface{}
err error
}{
{
name: "control space",
input: []byte(`<person><Name ID="123">John` + "\t" + `
</Name></person>`),
want: map[string]interface{}{
"person": map[string]interface{}{
"Name": map[string]interface{}{
"#text": "John",
"ID": "123",
},
},
},
},
{
name: "crlf",
input: []byte(`<person><Name ID="123">John` + "\r\n" + `</Name></person>`),
want: map[string]interface{}{
"person": map[string]interface{}{
"Name": map[string]interface{}{
"#text": "John",
"ID": "123",
},
},
},
},
{
name: "single invalid",
input: []byte(`<person><Name ID="123">John` + "\x80" + `</Name></person>`),
want: map[string]interface{}{
"person": map[string]interface{}{
"Name": map[string]interface{}{
"#text": "John\\ufffd",
"ID": "123",
},
},
},
},
{
name: "double invalid",
input: []byte(`<person><Name ID="123">` + "\x80" + `John` + "\x80" + `</Name></person>`),
want: map[string]interface{}{
"person": map[string]interface{}{
"Name": map[string]interface{}{
"#text": "\\ufffdJohn\\ufffd",
"ID": "123",
},
},
},
},
{
name: "happy single invalid",
input: []byte(`<person><Name ID="123">😊John` + "\x80" + `</Name></person>`),
want: map[string]interface{}{
"person": map[string]interface{}{
"Name": map[string]interface{}{
"#text": "😊John\\ufffd",
"ID": "123",
},
},
},
},
{
name: "invalid tag",
input: []byte(`<person><Name ID` + "\x80" + `="123">John</Name></person>`),
want: nil,
err: &xml.SyntaxError{Msg: "attribute name without = in element", Line: 1},
},
{
name: "invalid tag value",
input: []byte(`<person><Name ID="` + "\x80" + `123">John</Name></person>`),
want: map[string]interface{}{
"person": map[string]interface{}{
"Name": map[string]interface{}{
"#text": "John",
"ID": "\\ufffd123",
},
},
},
},
{
name: "unhappy",
input: []byte(`<person><Name ID="123">John is` + strings.Repeat(" ", 223) + ` 😞</Name></person>`),
want: map[string]interface{}{
"person": map[string]interface{}{
"Name": map[string]interface{}{
"#text": "John is" + strings.Repeat(" ", 223) + " 😞",
"ID": "123",
},
},
},
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
d := NewDecoder(NewSafeReader(test.input))
out, err := d.Decode()
assert.Equal(t, test.err, err)
assert.Equal(t, test.want, out)
})
}
}

0 comments on commit 76b958e

Please sign in to comment.