-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
83d2786
commit 7221a37
Showing
11 changed files
with
341 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
package config | ||
|
||
// SingleValueLoader returns a ValueLoader that always returns the same value. | ||
func SingleValueLoader[T any](v T) ValueLoader[T] { | ||
return &loader[T]{v} | ||
} | ||
|
||
// ValueLoader is an interface that can be used to load a value. | ||
type ValueLoader[T any] interface { | ||
Load() T | ||
} | ||
|
||
// loader is a ValueLoader that always returns the same value. | ||
type loader[T any] struct { | ||
v T | ||
} | ||
|
||
// Load returns the value. | ||
func (l *loader[T]) Load() T { | ||
return l.v | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
package sanitize | ||
|
||
import ( | ||
"strings" | ||
"unicode" | ||
|
||
"golang.org/x/text/unicode/rangetable" | ||
) | ||
|
||
// invisibleRunes unicode.IsPrint does not include all invisible characters, | ||
// so I got this list from https://invisible-characters.com/ | ||
var invisibleRunes = []rune{ | ||
'\u0000', // NULL | ||
'\u0009', // CHARACTER TABULATION | ||
'\u00A0', // NO-BREAK SPACE | ||
'\u00AD', // SOFT HYPHEN | ||
'\u034F', // COMBINING GRAPHEME JOINER | ||
'\u061C', // ARABIC LETTER MARK | ||
'\u115F', // HANGUL CHOSEONG FILLER | ||
'\u1160', // HANGUL JUNGSEONG FILLER | ||
'\u17B4', // KHMER VOWEL INHERENT AQ | ||
'\u17B5', // KHMER VOWEL INHERENT AA | ||
'\u180E', // MONGOLIAN VOWEL SEPARATOR | ||
'\u2000', // EN QUAD | ||
'\u2001', // EM QUAD | ||
'\u2002', // EN SPACE | ||
'\u2003', // EM SPACE | ||
'\u2004', // THREE-PER-EM SPACE | ||
'\u2005', // FOUR-PER-EM SPACE | ||
'\u2006', // SIX-PER-EM SPACE | ||
'\u2007', // FIGURE SPACE | ||
'\u2008', // PUNCTUATION SPACE | ||
'\u2009', // THIN SPACE | ||
'\u200A', // HAIR SPACE | ||
'\u200B', // ZERO WIDTH SPACE | ||
'\u200C', // ZERO WIDTH NON-JOINER | ||
'\u200D', // ZERO WIDTH JOINER | ||
'\u200E', // LEFT-TO-RIGHT MARK | ||
'\u200F', // RIGHT-TO-LEFT MARK | ||
'\u202F', // NARROW NO-BREAK SPACE | ||
'\u205F', // MEDIUM MATHEMATICAL SPACE | ||
'\u2060', // WORD JOINER | ||
'\u2061', // FUNCTION APPLICATION | ||
'\u2062', // INVISIBLE TIMES | ||
'\u2063', // INVISIBLE SEPARATOR | ||
'\u2064', // INVISIBLE PLUS | ||
'\u206A', // INHIBIT SYMMETRIC SWAPPING | ||
'\u206B', // ACTIVATE SYMMETRIC SWAPPING | ||
'\u206C', // INHIBIT ARABIC FORM SHAPING | ||
'\u206D', // ACTIVATE ARABIC FORM SHAPING | ||
'\u206E', // NATIONAL DIGIT SHAPES | ||
'\u206F', // NOMINAL DIGIT SHAPES | ||
'\u3000', // IDEOGRAPHIC SPACE | ||
'\u2800', // BRAILLE PATTERN BLANK | ||
'\u3164', // HANGUL FILLER | ||
'\uFEFF', // ZERO WIDTH NO-BREAK SPACE | ||
'\uFFA0', // HALFWIDTH HANGUL FILLER | ||
} | ||
|
||
var invisibleRangeTable *unicode.RangeTable | ||
|
||
func init() { | ||
invisibleRangeTable = rangetable.New(invisibleRunes...) | ||
} | ||
|
||
// Unicode removes irregularly invisible characters from a string. | ||
// | ||
// Irregularly invisible characters are defined as: | ||
// - Non-printable characters according to Go's unicode package (unicode.IsPrint). | ||
// - Characters in the invisibleRunes list (https://invisible-characters.com/). | ||
// | ||
// Note: Regular ASCII space (0x20) is not removed. | ||
func Unicode(str string) string { | ||
return strings.Map(func(r rune) rune { | ||
if unicode.Is(invisibleRangeTable, r) || !unicode.IsPrint(r) { | ||
return -1 | ||
} | ||
return r | ||
}, str) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
package sanitize | ||
|
||
import ( | ||
"fmt" | ||
"testing" | ||
"unicode" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
var out string | ||
|
||
func BenchmarkMessageID(b *testing.B) { | ||
dirtyMessageID := "\u0000 Test foo_bar-baz \u034F 123-222 " | ||
properMessageID := "123e4567-e89b-12d3-a456-426614174000" | ||
|
||
b.Run("in-place for loop - dirty", func(b *testing.B) { | ||
for i := 0; i < b.N; i++ { | ||
out = sanitizeMessageIDForLoop(dirtyMessageID) | ||
} | ||
}) | ||
|
||
b.Run("in-place for loop - proper", func(b *testing.B) { | ||
for i := 0; i < b.N; i++ { | ||
out = sanitizeMessageIDForLoop(properMessageID) | ||
} | ||
}) | ||
|
||
b.Run("strings map - dirty", func(b *testing.B) { | ||
for i := 0; i < b.N; i++ { | ||
out = Unicode(dirtyMessageID) | ||
} | ||
}) | ||
|
||
b.Run("strings map - proper", func(b *testing.B) { | ||
for i := 0; i < b.N; i++ { | ||
out = Unicode(properMessageID) | ||
} | ||
}) | ||
} | ||
|
||
// incorrect implementation of sanitizeMessageID, but used for benchmarking | ||
func sanitizeMessageIDForLoop(messageID string) string { | ||
for i, r := range messageID { | ||
if unicode.IsPrint(r) { | ||
continue | ||
} | ||
if !unicode.Is(invisibleRangeTable, r) { | ||
continue | ||
} | ||
|
||
messageID = messageID[:i] + messageID[i+1:] | ||
} | ||
return messageID | ||
} | ||
|
||
func TestSanitizeMessageID(t *testing.T) { | ||
testcases := []struct { | ||
in string | ||
out string | ||
}{ | ||
{"\u0000 Test \u0000foo_bar-baz 123-222 \u0000", " Test foo_bar-baz 123-222 "}, | ||
{"\u0000", ""}, | ||
{"\u0000 ", " "}, | ||
{"\u0000 \u0000", " "}, | ||
{"\u00A0\t\n\r\u034F", ""}, | ||
{"τυχαίο;", "τυχαίο;"}, | ||
} | ||
|
||
for _, tc := range testcases { | ||
cleanMessageID := Unicode(tc.in) | ||
require.Equal(t, tc.out, cleanMessageID, fmt.Sprintf("%#v -> %#v", tc.in, tc.out)) | ||
} | ||
|
||
for _, r := range invisibleRunes { | ||
cleanMessageID := Unicode(string(r)) | ||
require.Empty(t, cleanMessageID, fmt.Sprintf("%U", r)) | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
package stringify | ||
|
||
import ( | ||
"encoding/json" | ||
"fmt" | ||
) | ||
|
||
func Data(data any) string { | ||
if data == nil { | ||
return "" | ||
} | ||
switch d := data.(type) { | ||
case string: | ||
return d | ||
default: | ||
dataBytes, err := json.Marshal(d) | ||
if err != nil { | ||
return fmt.Sprint(d) | ||
} | ||
return string(dataBytes) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
package stringify_test | ||
|
||
import ( | ||
"errors" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
|
||
"github.com/rudderlabs/rudder-go-kit/stringify" | ||
) | ||
|
||
type failOnJSONMarshal struct{} | ||
|
||
func (f failOnJSONMarshal) MarshalJSON() ([]byte, error) { | ||
return nil, errors.New("failed to marshal") | ||
} | ||
|
||
func TestStringyData(t *testing.T) { | ||
testCases := []struct { | ||
name string | ||
input any | ||
expected string | ||
}{ | ||
{ | ||
name: "Nil input", | ||
input: nil, | ||
expected: "", | ||
}, | ||
{ | ||
name: "String input", | ||
input: "test string", | ||
expected: "test string", | ||
}, | ||
{ | ||
name: "Struct input", | ||
input: struct { | ||
Name string `json:"name"` | ||
Age int `json:"age"` | ||
}{Name: "John", Age: 30}, | ||
expected: `{"name":"John","age":30}`, | ||
}, | ||
{ | ||
name: "Slice input", | ||
input: []string{ | ||
"apple", "banana", "cherry", | ||
}, | ||
expected: `["apple","banana","cherry"]`, | ||
}, | ||
{ | ||
name: "Fail on JSON marshal", | ||
input: failOnJSONMarshal{}, | ||
expected: "{}", | ||
}, | ||
} | ||
|
||
for _, tc := range testCases { | ||
t.Run(tc.name, func(t *testing.T) { | ||
result := stringify.Data(tc.input) | ||
require.Equal(t, tc.expected, result) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package uuid | ||
|
||
import ( | ||
"crypto/md5" | ||
|
||
"github.com/google/uuid" | ||
) | ||
|
||
// GetMD5UUID hashes the given string into md5 and returns it as uuid | ||
func GetMD5UUID(str string) (uuid.UUID, error) { | ||
// To maintain backward compatibility, we are using md5 hash of the string | ||
// We are mimicking github.com/gofrs/uuid behavior: | ||
// | ||
// md5Sum := md5.Sum([]byte(str)) | ||
// u, err := uuid.FromBytes(md5Sum[:]) | ||
|
||
// u.SetVersion(uuid.V4) | ||
// u.SetVariant(uuid.VariantRFC4122) | ||
|
||
// google/uuid doesn't allow us to modify the version and variant, | ||
// so we are doing it manually, using gofrs/uuid library implementation. | ||
md5Sum := md5.Sum([]byte(str)) // skipcq: GO-S1023 | ||
// SetVariant: VariantRFC4122 | ||
md5Sum[8] = md5Sum[8]&(0xff>>2) | (0x02 << 6) | ||
// SetVersion: Version 4 | ||
version := byte(4) | ||
md5Sum[6] = (md5Sum[6] & 0x0f) | (version << 4) | ||
|
||
return uuid.FromBytes(md5Sum[:]) | ||
} |
Oops, something went wrong.