Skip to content

Commit

Permalink
fix: add envs and reduce memory
Browse files Browse the repository at this point in the history
  • Loading branch information
liuq19 committed Jul 3, 2024
1 parent 8bddb5a commit 5da790f
Show file tree
Hide file tree
Showing 31 changed files with 34,110 additions and 34,206 deletions.
1 change: 1 addition & 0 deletions external_jsonlib_test/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ require (
)

require (
github.com/bytedance/gopkg v0.0.0-20240531030433-5df24c0168e2 // indirect
github.com/bytedance/sonic/loader v0.1.1 // indirect
github.com/cloudwego/base64x v0.1.4 // indirect
github.com/cloudwego/iasm v0.2.0 // indirect
Expand Down
9 changes: 9 additions & 0 deletions external_jsonlib_test/go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
github.com/bytedance/gopkg v0.0.0-20240531030433-5df24c0168e2 h1:e+WTWDw35RetW+Zuhcy4YPFmNPRh7yPG7lAaWJeUQl0=
github.com/bytedance/gopkg v0.0.0-20240531030433-5df24c0168e2/go.mod h1:FtQG3YbQG9L/91pbKSw787yBQPutC+457AvDW77fgUQ=
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
Expand Down Expand Up @@ -45,6 +47,13 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 h1:18EFjUmQOcUvxNYSkA6jO9VAiXCnxFY6NyDX0bHDmkU=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/net v0.0.0-20221014081412-f15817d10f9b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
Expand Down
25 changes: 0 additions & 25 deletions fuzz/compat_amd64_test.go

This file was deleted.

25 changes: 0 additions & 25 deletions fuzz/compat_other.go

This file was deleted.

2 changes: 1 addition & 1 deletion fuzz/other_fuzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func fuzzHtmlEscape(t *testing.T, data []byte){
func fuzzStream(t *testing.T, data []byte) {
r := bytes.NewBuffer(data)
dc := decoder.NewStreamDecoder(r)
decoderEnableValidateString(dc)
dc.ValidateString()
r1 := bytes.NewBuffer(data)
dc1 := decoder.NewStreamDecoder(r1)

Expand Down
30 changes: 21 additions & 9 deletions internal/decoder/api/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,16 @@
package api

import (
`reflect`

`github.com/bytedance/sonic/internal/native`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/decoder/consts`
`github.com/bytedance/sonic/internal/decoder/errors`
`github.com/bytedance/sonic/internal/rt`
`github.com/bytedance/sonic/option`
"reflect"

"github.com/bytedance/sonic/internal/decoder/consts"
"github.com/bytedance/sonic/internal/decoder/errors"
"github.com/bytedance/sonic/internal/decoder/optdec"
"github.com/bytedance/sonic/internal/envs"
"github.com/bytedance/sonic/internal/native"
"github.com/bytedance/sonic/internal/native/types"
"github.com/bytedance/sonic/internal/rt"
"github.com/bytedance/sonic/option"
)

const (
Expand Down Expand Up @@ -64,11 +66,21 @@ type Decoder struct {
i int
f uint64
s string
owned bool
}

var padding string = "x\"x\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"

// NewDecoder creates a new decoder instance.
func NewDecoder(s string) *Decoder {
return &Decoder{s: s}
var owned = false
/* clone into buffer and padding it before unmarshal */
if envs.UseOptDec {
s = optdec.PaddingJson(s)
owned = true
}

return &Decoder{s: s, owned: owned}
}

// Pos returns the current decoding position.
Expand Down
13 changes: 0 additions & 13 deletions internal/decoder/api/stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,25 +61,16 @@ func NewStreamDecoder(r io.Reader) *StreamDecoder {
func (self *StreamDecoder) Decode(val interface{}) (err error) {
// read more data into buf
if self.More() {
// println(string(self.buf))
var s = self.scanp
try_skip:
var e = len(self.buf)
// println("s:", s, "e:", e, "scanned:",self.scanned, "scanp:",self.scanp, self.buf)
var src = rt.Mem2Str(self.buf[s:e])
// if len(src) > 5 {
// println(src[:5], src[len(src)-5:])
// } else {
// println(src)
// }
// try skip
var x = 0;
if y := native.SkipOneFast(&src, &x); y < 0 {
if self.readMore() {
// println("more")
goto try_skip
} else {
// println("no more")
err = SyntaxError{e, self.s, types.ParsingError(-s), ""}
self.setErr(err)
return
Expand All @@ -89,7 +80,6 @@ func (self *StreamDecoder) Decode(val interface{}) (err error) {
e = x + s
}

// println("decode: ", s, e)
// must copy string here for safety
self.Decoder.Reset(string(self.buf[s:e]))
err = self.Decoder.Decode(val)
Expand All @@ -101,13 +91,11 @@ func (self *StreamDecoder) Decode(val interface{}) (err error) {
self.scanp = e
_, empty := self.scan()
if empty {
// println("recycle")
// no remain valid bytes, thus we just recycle buffer
mem := self.buf
self.buf = nil
bufPool.Put(mem[:0])
} else {
// println("keep")
// remain undecoded bytes, move them onto head
n := copy(self.buf, self.buf[self.scanp:])
self.buf = self.buf[:n]
Expand All @@ -123,7 +111,6 @@ func (self *StreamDecoder) Decode(val interface{}) (err error) {
// InputOffset returns the input stream byte offset of the current decoder position.
// The offset gives the location of the end of the most recently returned token and the beginning of the next token.
func (self *StreamDecoder) InputOffset() int64 {
// println("input offset",self.scanned, self.scanp)
return self.scanned + int64(self.scanp)
}

Expand Down
1 change: 1 addition & 0 deletions internal/decoder/optdec/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ func Decode(s *string, i *int, f uint64, val interface{}) error {
if err != nil {
return err
}


/* parse into document */
ctx, err := NewContext(*s, *i, uint64(f), etp)
Expand Down
115 changes: 68 additions & 47 deletions internal/decoder/optdec/native.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import (
)


const _PADDING_SIZE = 64

type ErrorCode int

const (
Expand Down Expand Up @@ -85,10 +87,8 @@ func (self *nodeBuf) init(nodes []node) {
// should consitent with native/parser.c
type Parser struct {
Json string
padded []byte
nodes []node
dbuf []byte
backup []node

options uint64
// JSON cursor
Expand Down Expand Up @@ -116,105 +116,126 @@ type jsonStat struct {


var (
defaultJsonPaddedCap uintptr = 1 << 20 // 1 Mb
defaultNodesCap uintptr = (1 << 20) / unsafe.Sizeof(node{}) // 1 Mb
)

var parsePool sync.Pool = sync.Pool {
New: func () interface{} {
return &Parser{
options: 0,
padded: make([]byte, 0, defaultJsonPaddedCap),
nodes: make([]node, defaultNodesCap, defaultNodesCap),
dbuf: make([]byte, types.MaxDigitNums, types.MaxDigitNums),
}
},
}

var padding string = "x\"x\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
var _PADDING_STR string = "x\"x\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"

func PaddingJson(s string) string {
bytes := rt.Mallocgc(uintptr(len(s) + _PADDING_SIZE), rt.BytesType, false)
rt.Memmove(bytes, ((*rt.GoString)(unsafe.Pointer(&s))).Ptr, uintptr(len(s)))
rt.Memmove(unsafe.Pointer(uintptr(bytes) + uintptr(len(s))), ((*rt.GoString)(unsafe.Pointer(&_PADDING_STR))).Ptr, _PADDING_SIZE)
// not include padding buffer in string
owned := rt.GoString{Ptr: bytes, Len: len(s)}
return *(*string)(unsafe.Pointer(&owned))
}

func newParser(data string, pos int, opt uint64) *Parser {
func newParser(owned string, pos int, opt uint64) *Parser {
p := parsePool.Get().(*Parser)

/* validate json if needed */
if (opt & (1 << _F_validate_string)) != 0 && !utf8.ValidateString(data){
dbuf := utf8.CorrectWith(nil, rt.Str2Mem(data[pos:]), "\ufffd")
dbuf = append(dbuf, padding...)
p.Json = rt.Mem2Str(dbuf[:len(dbuf) - len(padding)])
if (opt & (1 << _F_validate_string)) != 0 && !utf8.ValidateString(owned){
dbuf := utf8.CorrectWith(nil, rt.Str2Mem(owned[pos:]), "\ufffd")
dbuf = append(dbuf, _PADDING_STR...)
p.Json = rt.Mem2Str(dbuf[:len(dbuf) - len(_PADDING_STR)])
p.Utf8Inv = true
p.start = uintptr((*rt.GoString)(unsafe.Pointer(&p.Json)).Ptr)
} else {
p.Json = data
// TODO: prevent too large JSON
p.padded = append(p.padded, data[pos:]...)
p.padded = append(p.padded, padding...)
p.start = uintptr((*rt.GoSlice)(unsafe.Pointer(&p.padded)).Ptr)
p.Json = owned
}

p.start = uintptr((*rt.GoString)(unsafe.Pointer(&p.Json)).Ptr)
p.cur = p.start
p.end = p.cur + uintptr(len(p.Json))
p.options = opt

if p.nodes == nil {
newn := rt.Mallocgc(uintptr(defaultNodesCap) * unsafe.Sizeof(node{}), rt.ByteType, false)
slice := rt.GoSlice {
Ptr: newn,
Len: int(defaultNodesCap),
Cap: int(defaultNodesCap),
}
p.nodes = *(*[]node)(unsafe.Pointer(&slice))
}
p.nbuf.init(p.nodes)
return p
}


func (p *Parser) Pos() int {
return int(p.cur - p.start)
}

func (p *Parser) JsonBytes() []byte {
if p.Utf8Inv {
return (rt.Str2Mem(p.Json))
} else {
return p.padded
return rt.Str2Mem(p.Json)
}

func (p *Parser) growNodeBuf() {
// calculate new capacity
offset := p.nbuf.ncur - p.nbuf.nstart
oldLen := int(offset / unsafe.Sizeof(node{}))
newCap := oldLen * 2
if newCap <= len(p.Json) / 32 {
newCap = len(p.Json) / 32
}

// grow node buffer
newn := rt.Mallocgc(uintptr(newCap) * unsafe.Sizeof(node{}), rt.ByteType, false)
rt.Memmove(newn, unsafe.Pointer(p.nbuf.nstart), offset)
slice := rt.GoSlice {
Ptr: newn,
Len: newCap,
Cap: newCap,
}
p.nodes = *(*[]node)(unsafe.Pointer(&slice))

// update cursor
p.nbuf.ncur = uintptr(newn) + offset
p.nbuf.nstart = uintptr(newn)
p.nbuf.nend = uintptr(newn) + uintptr(newCap) * unsafe.Sizeof(node{})

}

func (p *Parser) parse() ErrorCode {
var offset uintptr
var err ErrorCode

// when decode into struct, we should decode number as possible
old := p.options
if !p.isEface {
p.options &^= 1 << _F_use_number
}

// fast path with limited node buffer
err := ErrorCode(native.ParseWithPadding(unsafe.Pointer(p)))
if err != SONIC_VISIT_FAILED {
goto ret;
for {
// fast path with limited node buffer
err = ErrorCode(native.ParseWithPadding(unsafe.Pointer(p)))
if err != SONIC_VISIT_FAILED {
p.options = old
return err
}
p.growNodeBuf()
}

// fallback parse
// maybe node buf is not enough, continue
p.backup = p.nodes
p.nodes = make([]node, len(p.Json) / 2 + 2, len(p.Json) / 2 + 2)
copy(p.nodes, p.backup)
offset = (p.nbuf.ncur - p.nbuf.nstart) / unsafe.Sizeof(node{})
p.nbuf.nstart = uintptr(unsafe.Pointer(&p.nodes[0]))
p.nbuf.nend = p.nbuf.nstart + uintptr(cap(p.nodes)) * unsafe.Sizeof(node{})
p.nbuf.ncur = uintptr(unsafe.Pointer(&p.nodes[offset]))
err = ErrorCode(native.ParseWithPadding(unsafe.Pointer(p)))

ret:
p.options = old
return err
}

func (p *Parser) reset() {
p.options = 0
p.padded = p.padded[:0]
// nodes is too large here, we will not reset it and use small backup nodes buffer
if p.backup != nil {
p.nodes = p.backup
p.backup = nil
}
p.start = 0
p.cur = 0
p.end = 0
p.Json = ""
p.nbuf = nodeBuf{}
p._nbk = _nospaceBlock{}
if len(p.nodes) > int(defaultNodesCap) {
p.nodes = nil
}
p.Utf8Inv = false
p.isEface = false
}
Expand Down
Loading

0 comments on commit 5da790f

Please sign in to comment.