Skip to content

Commit

Permalink
feat: additional of some helpers
Browse files Browse the repository at this point in the history
  • Loading branch information
achettyiitr committed Apr 8, 2024
1 parent 83d2786 commit 7221a37
Show file tree
Hide file tree
Showing 11 changed files with 341 additions and 4 deletions.
5 changes: 4 additions & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ linters-settings:
main:
files:
- $all
- "!**/uuid_test.go"
deny:
- pkg: "github.com/gofrs/uuid"
desc: 'use github.com/google/uuid instead'
desc: 'use github.com/google/uuid instead'
- pkg: "golang.org/x/exp/slices"
desc: 'use "slices" instead'
2 changes: 1 addition & 1 deletion config/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ import (
"fmt"
"os"
"reflect"
"slices"
"strings"
"time"

"github.com/fsnotify/fsnotify"
"github.com/joho/godotenv"
"github.com/spf13/viper"
"golang.org/x/exp/slices"
)

func (c *Config) load() {
Expand Down
21 changes: 21 additions & 0 deletions config/valueloader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package config

// SingleValueLoader returns a ValueLoader that always returns the same value.
func SingleValueLoader[T any](v T) ValueLoader[T] {
return &loader[T]{v}
}

// ValueLoader is an interface that can be used to load a value.
type ValueLoader[T any] interface {
Load() T
}

// loader is a ValueLoader that always returns the same value.
type loader[T any] struct {
v T
}

// Load returns the value.
func (l *loader[T]) Load() T {
return l.v
}
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ require (
github.com/fsnotify/fsnotify v1.7.0
github.com/go-chi/chi/v5 v5.0.12
github.com/go-redis/redis/v8 v8.11.5
github.com/gofrs/uuid v4.4.0+incompatible
github.com/golang/mock v1.6.0
github.com/google/uuid v1.6.0
github.com/joho/godotenv v1.5.1
Expand Down Expand Up @@ -47,9 +48,9 @@ require (
go.uber.org/goleak v1.3.0
go.uber.org/zap v1.27.0
golang.org/x/crypto v0.21.0
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a
golang.org/x/oauth2 v0.18.0
golang.org/x/sync v0.6.0
golang.org/x/text v0.14.0
google.golang.org/api v0.172.0
google.golang.org/protobuf v1.33.0
gopkg.in/alexcesaro/statsd.v2 v2.0.0
Expand Down Expand Up @@ -142,10 +143,10 @@ require (
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.24.0 // indirect
go.opentelemetry.io/proto/otlp v1.1.0 // indirect
go.uber.org/multierr v1.10.0 // indirect
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect
golang.org/x/mod v0.14.0 // indirect
golang.org/x/net v0.22.0 // indirect
golang.org/x/sys v0.18.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/tools v0.17.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ github.com/go-redis/redis/v8 v8.11.5 h1:AcZZR7igkdvfVmQTPnu9WE37LRrO/YrBH5zWyjDC
github.com/go-redis/redis/v8 v8.11.5/go.mod h1:gREzHqY1hg6oD9ngVRbLStwAWKhA0FEgq8Jd4h5lpwo=
github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE=
github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/gofrs/uuid v4.4.0+incompatible h1:3qXRTX8/NbyulANqlc0lchS1gqAVxRgsuW1YrTJupqA=
github.com/gofrs/uuid v4.4.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
Expand Down
80 changes: 80 additions & 0 deletions sanitize/sanitize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package sanitize

import (
"strings"
"unicode"

"golang.org/x/text/unicode/rangetable"
)

// invisibleRunes unicode.IsPrint does not include all invisible characters,
// so I got this list from https://invisible-characters.com/
var invisibleRunes = []rune{
'\u0000', // NULL
'\u0009', // CHARACTER TABULATION
'\u00A0', // NO-BREAK SPACE
'\u00AD', // SOFT HYPHEN
'\u034F', // COMBINING GRAPHEME JOINER
'\u061C', // ARABIC LETTER MARK
'\u115F', // HANGUL CHOSEONG FILLER
'\u1160', // HANGUL JUNGSEONG FILLER
'\u17B4', // KHMER VOWEL INHERENT AQ
'\u17B5', // KHMER VOWEL INHERENT AA
'\u180E', // MONGOLIAN VOWEL SEPARATOR
'\u2000', // EN QUAD
'\u2001', // EM QUAD
'\u2002', // EN SPACE
'\u2003', // EM SPACE
'\u2004', // THREE-PER-EM SPACE
'\u2005', // FOUR-PER-EM SPACE
'\u2006', // SIX-PER-EM SPACE
'\u2007', // FIGURE SPACE
'\u2008', // PUNCTUATION SPACE
'\u2009', // THIN SPACE
'\u200A', // HAIR SPACE
'\u200B', // ZERO WIDTH SPACE
'\u200C', // ZERO WIDTH NON-JOINER
'\u200D', // ZERO WIDTH JOINER
'\u200E', // LEFT-TO-RIGHT MARK
'\u200F', // RIGHT-TO-LEFT MARK
'\u202F', // NARROW NO-BREAK SPACE
'\u205F', // MEDIUM MATHEMATICAL SPACE
'\u2060', // WORD JOINER
'\u2061', // FUNCTION APPLICATION
'\u2062', // INVISIBLE TIMES
'\u2063', // INVISIBLE SEPARATOR
'\u2064', // INVISIBLE PLUS
'\u206A', // INHIBIT SYMMETRIC SWAPPING
'\u206B', // ACTIVATE SYMMETRIC SWAPPING
'\u206C', // INHIBIT ARABIC FORM SHAPING
'\u206D', // ACTIVATE ARABIC FORM SHAPING
'\u206E', // NATIONAL DIGIT SHAPES
'\u206F', // NOMINAL DIGIT SHAPES
'\u3000', // IDEOGRAPHIC SPACE
'\u2800', // BRAILLE PATTERN BLANK
'\u3164', // HANGUL FILLER
'\uFEFF', // ZERO WIDTH NO-BREAK SPACE
'\uFFA0', // HALFWIDTH HANGUL FILLER
}

var invisibleRangeTable *unicode.RangeTable

func init() {
invisibleRangeTable = rangetable.New(invisibleRunes...)
}

// Unicode removes irregularly invisible characters from a string.
//
// Irregularly invisible characters are defined as:
// - Non-printable characters according to Go's unicode package (unicode.IsPrint).
// - Characters in the invisibleRunes list (https://invisible-characters.com/).
//
// Note: Regular ASCII space (0x20) is not removed.
func Unicode(str string) string {
return strings.Map(func(r rune) rune {
if unicode.Is(invisibleRangeTable, r) || !unicode.IsPrint(r) {
return -1
}
return r
}, str)
}
80 changes: 80 additions & 0 deletions sanitize/sanitize_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package sanitize

import (
"fmt"
"testing"
"unicode"

"github.com/stretchr/testify/require"
)

var out string

func BenchmarkMessageID(b *testing.B) {
dirtyMessageID := "\u0000 Test foo_bar-baz \u034F 123-222 "
properMessageID := "123e4567-e89b-12d3-a456-426614174000"

b.Run("in-place for loop - dirty", func(b *testing.B) {
for i := 0; i < b.N; i++ {
out = sanitizeMessageIDForLoop(dirtyMessageID)
}
})

b.Run("in-place for loop - proper", func(b *testing.B) {
for i := 0; i < b.N; i++ {
out = sanitizeMessageIDForLoop(properMessageID)
}
})

b.Run("strings map - dirty", func(b *testing.B) {
for i := 0; i < b.N; i++ {
out = Unicode(dirtyMessageID)
}
})

b.Run("strings map - proper", func(b *testing.B) {
for i := 0; i < b.N; i++ {
out = Unicode(properMessageID)
}
})
}

// incorrect implementation of sanitizeMessageID, but used for benchmarking
func sanitizeMessageIDForLoop(messageID string) string {
for i, r := range messageID {
if unicode.IsPrint(r) {
continue
}
if !unicode.Is(invisibleRangeTable, r) {
continue
}

messageID = messageID[:i] + messageID[i+1:]
}
return messageID
}

func TestSanitizeMessageID(t *testing.T) {
testcases := []struct {
in string
out string
}{
{"\u0000 Test \u0000foo_bar-baz 123-222 \u0000", " Test foo_bar-baz 123-222 "},
{"\u0000", ""},
{"\u0000 ", " "},
{"\u0000 \u0000", " "},
{"\u00A0\t\n\r\u034F", ""},
{"τυχαίο;", "τυχαίο;"},
}

for _, tc := range testcases {
cleanMessageID := Unicode(tc.in)
require.Equal(t, tc.out, cleanMessageID, fmt.Sprintf("%#v -> %#v", tc.in, tc.out))
}

for _, r := range invisibleRunes {
cleanMessageID := Unicode(string(r))
require.Empty(t, cleanMessageID, fmt.Sprintf("%U", r))

}
}
22 changes: 22 additions & 0 deletions stringify/stringify.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package stringify

import (
"encoding/json"
"fmt"
)

func Data(data any) string {
if data == nil {
return ""
}
switch d := data.(type) {
case string:
return d
default:
dataBytes, err := json.Marshal(d)
if err != nil {
return fmt.Sprint(d)
}
return string(dataBytes)
}
}
62 changes: 62 additions & 0 deletions stringify/stringify_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package stringify_test

import (
"errors"
"testing"

"github.com/stretchr/testify/require"

"github.com/rudderlabs/rudder-go-kit/stringify"
)

type failOnJSONMarshal struct{}

func (f failOnJSONMarshal) MarshalJSON() ([]byte, error) {
return nil, errors.New("failed to marshal")
}

func TestStringyData(t *testing.T) {
testCases := []struct {
name string
input any
expected string
}{
{
name: "Nil input",
input: nil,
expected: "",
},
{
name: "String input",
input: "test string",
expected: "test string",
},
{
name: "Struct input",
input: struct {
Name string `json:"name"`
Age int `json:"age"`
}{Name: "John", Age: 30},
expected: `{"name":"John","age":30}`,
},
{
name: "Slice input",
input: []string{
"apple", "banana", "cherry",
},
expected: `["apple","banana","cherry"]`,
},
{
name: "Fail on JSON marshal",
input: failOnJSONMarshal{},
expected: "{}",
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
result := stringify.Data(tc.input)
require.Equal(t, tc.expected, result)
})
}
}
30 changes: 30 additions & 0 deletions uuid/uuid.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package uuid

import (
"crypto/md5"

"github.com/google/uuid"
)

// GetMD5UUID hashes the given string into md5 and returns it as uuid
func GetMD5UUID(str string) (uuid.UUID, error) {
// To maintain backward compatibility, we are using md5 hash of the string
// We are mimicking github.com/gofrs/uuid behavior:
//
// md5Sum := md5.Sum([]byte(str))
// u, err := uuid.FromBytes(md5Sum[:])

// u.SetVersion(uuid.V4)
// u.SetVariant(uuid.VariantRFC4122)

// google/uuid doesn't allow us to modify the version and variant,
// so we are doing it manually, using gofrs/uuid library implementation.
md5Sum := md5.Sum([]byte(str)) // skipcq: GO-S1023
// SetVariant: VariantRFC4122
md5Sum[8] = md5Sum[8]&(0xff>>2) | (0x02 << 6)
// SetVersion: Version 4
version := byte(4)
md5Sum[6] = (md5Sum[6] & 0x0f) | (version << 4)

return uuid.FromBytes(md5Sum[:])
}
Loading

0 comments on commit 7221a37

Please sign in to comment.