Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

baggage: Fix invalid percent-encoded octet sequences #5528

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
32d8397
Fix issue when percent encoded octet sequences don't match the UTF-8 …
santileira Jun 19, 2024
7b621ca
Update CHANGELOG with the PR number
santileira Jun 19, 2024
4c9c61c
Cover cases when we have multiple percent encoded octet sequences
santileira Jun 19, 2024
b565fa2
Improve code comments
santileira Jun 19, 2024
45546c6
Improve code comments and link to the documentation
santileira Jun 19, 2024
3acb8a3
Update CHANGELOG.md
santileira Jun 20, 2024
edc4fa2
Merge branch 'main' into fix/invalid-percent-encoded-octet-sequences
santileira Jun 20, 2024
94dc3ef
Change i by i+size
santileira Jun 24, 2024
6408a5f
Merge branch 'fix/invalid-percent-encoded-octet-sequences' of github.…
santileira Jun 24, 2024
8bfa73e
Merge branch 'main' into fix/invalid-percent-encoded-octet-sequences
santileira Jun 24, 2024
b06525f
Merge branch 'main' into fix/invalid-percent-encoded-octet-sequences
santileira Jun 25, 2024
05e84a2
Addressed feedback
santileira Jul 2, 2024
41c59eb
Merge branch 'fix/invalid-percent-encoded-octet-sequences' of github.…
santileira Jul 2, 2024
a63b75f
Merge branch 'main' into fix/invalid-percent-encoded-octet-sequences
santileira Jul 2, 2024
f424cc0
Merge branch 'main' into fix/invalid-percent-encoded-octet-sequences
santileira Jul 3, 2024
356b6ac
Add case on BnechmarkParse
santileira Jul 4, 2024
d9dc0e7
Add new algorithm for replace function
santileira Jul 4, 2024
c29dd7b
Merge branch 'fix/invalid-percent-encoded-octet-sequences' of github.…
santileira Jul 4, 2024
9864776
WIP
santileira Jul 4, 2024
bff973d
Addressed feedback
santileira Jul 4, 2024
cf7d0ad
Merge branch 'main' into fix/invalid-percent-encoded-octet-sequences
santileira Jul 5, 2024
75f1f5d
Addressed feedback
santileira Jul 5, 2024
7a12e61
Update baggage/baggage.go
santileira Jul 5, 2024
3a31882
Fix lint errors
santileira Jul 5, 2024
9350106
Merge branch 'main' into fix/invalid-percent-encoded-octet-sequences
santileira Jul 5, 2024
6fc0577
Merge branch 'main' into fix/invalid-percent-encoded-octet-sequences
santileira Jul 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
- Fix stale timestamps reported by the last-value aggregation. (#5517)
- Indicate the `Exporter` in `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp` must be created by the `New` method. (#5521)
- Improved performance in all `{Bool,Int64,Float64,String}SliceValue` functions of `go.opentelemetry.io/attributes` by reducing the number of allocations. (#5549)
- Replace invalid percent-encoded octet sequences with replacement char in `go.opentelemetry.io/otel/baggage`. (#5528)

## [1.27.0/0.49.0/0.3.0] 2024-05-21

Expand Down
36 changes: 32 additions & 4 deletions baggage/baggage.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,19 +294,45 @@ func parseMember(member string) (Member, error) {
return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidKey, key)
}

val := strings.TrimSpace(v)
if !validateValue(val) {
rawVal := strings.TrimSpace(v)
if !validateValue(rawVal) {
return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidValue, v)
}

// Decode a percent-encoded value.
value, err := url.PathUnescape(val)
unescapeVal, err := url.PathUnescape(rawVal)
if err != nil {
return newInvalidMember(), fmt.Errorf("%w: %w", errInvalidValue, err)
}

value := replaceInvalidUTF8Sequences(len(rawVal), unescapeVal)
return Member{key: key, value: value, properties: props, hasData: true}, nil
}

// replaceInvalidUTF8Sequences replaces invalid UTF-8 sequences with '�'.
func replaceInvalidUTF8Sequences(cap int, unescapeVal string) string {
if utf8.ValidString(unescapeVal) {
return unescapeVal
}
// W3C baggage spec:
// https://github.com/w3c/baggage/blob/8c215efbeebd3fa4b1aceb937a747e56444f22f3/baggage/HTTP_HEADER_FORMAT.md?plain=1#L69

var b strings.Builder
b.Grow(cap)
for i := 0; i < len(unescapeVal); {
r, size := utf8.DecodeRuneInString(unescapeVal[i:])
if r == utf8.RuneError && size == 1 {
// Invalid UTF-8 sequence found, replace it with '�'
_, _ = b.WriteString("�")
} else {
_, _ = b.WriteRune(r)
}
i += size
}

return b.String()
}

// validate ensures m conforms to the W3C Baggage specification.
// A key must be an ASCII string, returning an error otherwise.
func (m Member) validate() error {
Expand Down Expand Up @@ -607,10 +633,12 @@ func parsePropertyInternal(s string) (p Property, ok bool) {
}

// Decode a percent-encoded value.
value, err := url.PathUnescape(s[valueStart:valueEnd])
rawVal := s[valueStart:valueEnd]
unescapeVal, err := url.PathUnescape(rawVal)
if err != nil {
return
}
value := replaceInvalidUTF8Sequences(len(rawVal), unescapeVal)

ok = true
p.key = s[keyStart:keyEnd]
Expand Down
62 changes: 61 additions & 1 deletion baggage/baggage_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"slices"
"strings"
"testing"
"unicode/utf8"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -469,6 +470,18 @@ func TestBaggageParse(t *testing.T) {
in: tooManyMembers,
err: errMemberNumber,
},
{
name: "percent-encoded octet sequences do not match the UTF-8 encoding scheme",
in: "k=aa%ffcc;p=d%fff",
want: baggage.List{
"k": {
Value: "aa�cc",
Properties: []baggage.Property{
{Key: "p", Value: "d�f", HasValue: true},
},
},
},
},
}

for _, tc := range testcases {
Expand All @@ -480,6 +493,53 @@ func TestBaggageParse(t *testing.T) {
}
}

func TestBaggageParseValue(t *testing.T) {
testcases := []struct {
name string
in string
valueWant string
valueWantSize int
}{
{
name: "percent encoded octet sequence matches UTF-8 encoding scheme",
in: "k=aa%26cc",
valueWant: "aa&cc",
valueWantSize: 5,
},
{
name: "percent encoded octet sequence doesn't match UTF-8 encoding scheme",
in: "k=aa%ffcc",
valueWant: "aa�cc",
valueWantSize: 7,
},
{
name: "multiple percent encoded octet sequences don't match UTF-8 encoding scheme",
in: "k=aa%ffcc%fedd%fa",
valueWant: "aa�cc�dd�",
valueWantSize: 15,
},
{
name: "raw value",
in: "k=aacc",
valueWant: "aacc",
valueWantSize: 4,
},
}

for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
b, err := Parse(tc.in)
assert.Empty(t, err)

val := b.Members()[0].Value()

assert.EqualValues(t, val, tc.valueWant)
assert.Equal(t, len(val), tc.valueWantSize)
assert.True(t, utf8.ValidString(val))
})
}
}

func TestBaggageString(t *testing.T) {
testcases := []struct {
name string
Expand Down Expand Up @@ -979,7 +1039,7 @@ func BenchmarkParse(b *testing.B) {
b.ReportAllocs()

for i := 0; i < b.N; i++ {
benchBaggage, _ = Parse(`userId=alice,serverNode = DF28 , isProduction = false,hasProp=stuff;propKey;propWValue=value`)
benchBaggage, _ = Parse("userId=alice,serverNode = DF28 , isProduction = false,hasProp=stuff;propKey;propWValue=value, invalidUtf8=pr%ffo%ffp%fcValue")
}
}

Expand Down