From 2d2555f0b411e2a195b0295015059fd76bc333e5 Mon Sep 17 00:00:00 2001 From: ucwong Date: Mon, 25 Sep 2023 22:27:00 +0800 Subject: [PATCH] deps --- go.mod | 8 +- go.sum | 16 +- .../CortexFoundation/robot/backend/common.go | 5 + .../CortexFoundation/robot/backend/torrent.go | 4 +- .../CortexFoundation/robot/config.go | 8 + .../CortexFoundation/robot/model_srv.go | 7 +- .../CortexFoundation/robot/monitor.go | 12 +- .../zeebo/xxh3/accum_vector_avx512_amd64.s | 296 +++--- .../zeebo/xxh3/accum_vector_avx_amd64.s | 913 +++++++++--------- vendor/modules.txt | 8 +- 10 files changed, 647 insertions(+), 630 deletions(-) diff --git a/go.mod b/go.mod index 4fba1017e7..358799d1f4 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v0.4.1 github.com/CortexFoundation/inference v1.0.2-0.20230307032835-9197d586a4e8 github.com/CortexFoundation/statik v0.0.0-20210315012922-8bb8a7b5dc66 - github.com/CortexFoundation/torrentfs v1.0.55-0.20230924132644-328dc7f8815c + github.com/CortexFoundation/torrentfs v1.0.55-0.20230925132950-bf8f925efaea github.com/VictoriaMetrics/fastcache v1.12.1 github.com/arsham/figurine v1.3.0 github.com/aws/aws-sdk-go-v2 v1.21.0 @@ -82,7 +82,7 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/internal v1.0.0 // indirect github.com/CortexFoundation/cvm-runtime v0.0.0-20221117094012-b5a251885572 // indirect github.com/CortexFoundation/merkletree v0.0.0-20230724124840-b6e80265a137 // indirect - github.com/CortexFoundation/robot v1.0.7-0.20230924130929-01afc8203c14 // indirect + github.com/CortexFoundation/robot v1.0.7-0.20230924205950-05c3925242ed // indirect github.com/CortexFoundation/wormhole v0.0.2-0.20230922082251-f97b53242e48 // indirect github.com/DataDog/zstd v1.5.6-0.20230622172052-ea68dcab66c0 // indirect github.com/RoaringBitmap/roaring v1.3.0 // indirect @@ -219,12 +219,12 @@ require ( github.com/ucwong/filecache v1.0.6-0.20230405163841-810d53ced4bd // indirect github.com/ucwong/go-ttlmap v1.0.2-0.20221020173635-331e7ddde2bb // indirect github.com/ucwong/golang-kv v1.0.23-0.20230922195406-1f1883da3532 // indirect - github.com/ucwong/shard v1.0.1-0.20230902205521-676c0c9c1dd2 // indirect + github.com/ucwong/shard v1.0.1-0.20230924231639-2ac2d8ab288c // indirect github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect github.com/xujiajun/mmap-go v1.0.1 // indirect github.com/xujiajun/utils v0.0.0-20220904132955-5f7c5b914235 // indirect github.com/yusufpapurcu/wmi v1.2.3 // indirect - github.com/zeebo/xxh3 v1.0.3-0.20230105190837-8b1e819c6d4d // indirect + github.com/zeebo/xxh3 v1.0.3-0.20230502181907-3808c706a06a // indirect go.etcd.io/bbolt v1.3.7 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/otel v1.16.0 // indirect diff --git a/go.sum b/go.sum index b1aa196846..7f01c0b2f2 100644 --- a/go.sum +++ b/go.sum @@ -60,15 +60,15 @@ github.com/CortexFoundation/inference v1.0.2-0.20230307032835-9197d586a4e8 h1:W/ github.com/CortexFoundation/inference v1.0.2-0.20230307032835-9197d586a4e8/go.mod h1:Doj3mBNzdjCDvKVwysKaHEPbS20A7RRaQY0bHtEVz88= github.com/CortexFoundation/merkletree v0.0.0-20230724124840-b6e80265a137 h1:GdrLwJRKPrUSd4V/cBpPyzhNQEo8IDT7Le15hmuC+/4= github.com/CortexFoundation/merkletree v0.0.0-20230724124840-b6e80265a137/go.mod h1:OwfhC316GcEJ9QVNWPqj6QV7sorcXBnc0P9p8dPfSbM= -github.com/CortexFoundation/robot v1.0.7-0.20230924130929-01afc8203c14 h1:ayq7w6SeAN122hJf7GeFKss2LzyBg8NdaoxGm2mO1dA= -github.com/CortexFoundation/robot v1.0.7-0.20230924130929-01afc8203c14/go.mod h1:8y8iDdeeXuTGBxQihkt+5KnwL6OtCLeA7HGpHm5+S1w= +github.com/CortexFoundation/robot v1.0.7-0.20230924205950-05c3925242ed h1:LuwYo3FNfIvy0MDfrQRkW31MTtDz80wikKhaw/I7NXA= +github.com/CortexFoundation/robot v1.0.7-0.20230924205950-05c3925242ed/go.mod h1:8y8iDdeeXuTGBxQihkt+5KnwL6OtCLeA7HGpHm5+S1w= github.com/CortexFoundation/statik v0.0.0-20210315012922-8bb8a7b5dc66 h1:yJbN4DFvpStCShXOVxNV64aawsPqizLuXZhrnhCr2fY= github.com/CortexFoundation/statik v0.0.0-20210315012922-8bb8a7b5dc66/go.mod h1:AkjV4OECAskB9m6w+2e84F0Zcx7oZWEmHB3EKoaDXYk= github.com/CortexFoundation/torrentfs v1.0.13-0.20200623060705-ce027f43f2f8/go.mod h1:Ma+tGhPPvz4CEZHaqEJQMOEGOfHeQBiAoNd1zyc/w3Q= github.com/CortexFoundation/torrentfs v1.0.14-0.20200703071639-3fcabcabf274/go.mod h1:qnb3YlIJmuetVBtC6Lsejr0Xru+1DNmDCdTqnwy7lhk= github.com/CortexFoundation/torrentfs v1.0.20-0.20200810031954-d36d26f82fcc/go.mod h1:N5BsicP5ynjXIi/Npl/SRzlJ630n1PJV2sRj0Z0t2HA= -github.com/CortexFoundation/torrentfs v1.0.55-0.20230924132644-328dc7f8815c h1:eED3XfmiW4K0sNeqMq8wwDMyYPYDctdYueJ4D8WBKY8= -github.com/CortexFoundation/torrentfs v1.0.55-0.20230924132644-328dc7f8815c/go.mod h1:NCZqhUSZt+dwViKv3mzDw5MowvBzbTE/0kYqXSzFQdE= +github.com/CortexFoundation/torrentfs v1.0.55-0.20230925132950-bf8f925efaea h1:2mOtJfaSmXlPsl6ehyHN28ErOk54eoCaDaxg4BJZmnU= +github.com/CortexFoundation/torrentfs v1.0.55-0.20230925132950-bf8f925efaea/go.mod h1:eGj6kGpTRO5AbYo2z2FkGlXSL1CcK7fG5TIGaXMBGNY= github.com/CortexFoundation/wormhole v0.0.2-0.20230922082251-f97b53242e48 h1:EDrk6U+GjSJ1FdbTrtRDe3LA/Ot6E3xu/HpXAio99B4= github.com/CortexFoundation/wormhole v0.0.2-0.20230922082251-f97b53242e48/go.mod h1:a2ynt5IqAlGTWLQY0pILqkxYe4AzHLNd+bPmK/r03oE= github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= @@ -1258,8 +1258,8 @@ github.com/ucwong/golang-set v1.8.1-0.20200419153428-d7b0b1ac2d43/go.mod h1:xu0F github.com/ucwong/goleveldb v1.0.3-0.20200508074755-578cba616f37/go.mod h1:dgJUTtDxq/ne6/JzZhHzF24OL/uqILz9IWk8HmT4V2g= github.com/ucwong/goleveldb v1.0.3-0.20200618184106-f1c6bc3a428b/go.mod h1:7Sq6w7AfEZuB/a6mrlvHCSXCSkqojCMMrM3Ei12QAT0= github.com/ucwong/goleveldb v1.0.3-0.20200807021306-7d6f914f4fab/go.mod h1:7Sq6w7AfEZuB/a6mrlvHCSXCSkqojCMMrM3Ei12QAT0= -github.com/ucwong/shard v1.0.1-0.20230902205521-676c0c9c1dd2 h1:aLhuaWVl4L+mhsYO52QflDb3OukXRy/aAg6qP2MuVRM= -github.com/ucwong/shard v1.0.1-0.20230902205521-676c0c9c1dd2/go.mod h1:pZgA0970EQMQuCyAjCs3Q+9ne2QqnBBiirrTyw+zYiE= +github.com/ucwong/shard v1.0.1-0.20230924231639-2ac2d8ab288c h1:6amFZXt8Md3xghS4RfqbYYHhURr0wlnS1e5YDW20Kqs= +github.com/ucwong/shard v1.0.1-0.20230924231639-2ac2d8ab288c/go.mod h1:zt2/4NyVHn3xGgm8ybwqlgOn5ApkB5eR1UziTpnLmVM= github.com/ucwong/tsdb v0.10.4-0.20200505032819-8f9eeea5c692/go.mod h1:dw8qQ+CN0e7UFHI94vvExZ2UJg10jeFhP16PeXPtvsU= github.com/ucwong/tsdb v0.10.4-0.20200518132041-df9cb51f3a80/go.mod h1:7uwwGAwwZp039dLF4SLwyB691OcSipsMo5D0ZDlRU5M= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= @@ -1301,8 +1301,8 @@ github.com/yusufpapurcu/wmi v1.2.3 h1:E1ctvB7uKFMOJw3fdOW32DwGE9I7t++CRUEMKvFoFi github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= -github.com/zeebo/xxh3 v1.0.3-0.20230105190837-8b1e819c6d4d h1:T9HDihmAbNxk9P9QfaMUSvKKdPM8ynezMHK8pMDOCzo= -github.com/zeebo/xxh3 v1.0.3-0.20230105190837-8b1e819c6d4d/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +github.com/zeebo/xxh3 v1.0.3-0.20230502181907-3808c706a06a h1:wNpehQr1+k+Z6H5RvqnCPVH9Dg/d1/ZbWucPQ+FLva4= +github.com/zeebo/xxh3 v1.0.3-0.20230502181907-3808c706a06a/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.4/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= diff --git a/vendor/github.com/CortexFoundation/robot/backend/common.go b/vendor/github.com/CortexFoundation/robot/backend/common.go index 8f4e0fed93..2b55735ca3 100644 --- a/vendor/github.com/CortexFoundation/robot/backend/common.go +++ b/vendor/github.com/CortexFoundation/robot/backend/common.go @@ -18,6 +18,7 @@ package backend import ( "encoding/binary" + "strconv" ) func uint64ToBytes(i uint64) []byte { @@ -25,3 +26,7 @@ func uint64ToBytes(i uint64) []byte { binary.BigEndian.PutUint64(buf[:], i) return buf[:] } + +func uint64ToHex(i uint64) []byte { + return []byte(strconv.FormatUint(i, 16)) +} diff --git a/vendor/github.com/CortexFoundation/robot/backend/torrent.go b/vendor/github.com/CortexFoundation/robot/backend/torrent.go index 1aa15ad655..b8136f1d1c 100644 --- a/vendor/github.com/CortexFoundation/robot/backend/torrent.go +++ b/vendor/github.com/CortexFoundation/robot/backend/torrent.go @@ -47,14 +47,14 @@ func (fs *ChainDB) SetTorrentProgress(ih string, size uint64) (bool, uint64, err v := buk.Get([]byte(ih)) if v == nil { - err = buk.Put([]byte(ih), []byte(strconv.FormatUint(size, 16))) + err = buk.Put([]byte(ih), uint64ToHex(size)) } else { s, err := strconv.ParseUint(string(v), 16, 64) if err != nil { return err } if size > s { - err = buk.Put([]byte(ih), []byte(strconv.FormatUint(size, 16))) + err = buk.Put([]byte(ih), uint64ToHex(size)) } else { size = s } diff --git a/vendor/github.com/CortexFoundation/robot/config.go b/vendor/github.com/CortexFoundation/robot/config.go index 2c0b443480..912cf50312 100644 --- a/vendor/github.com/CortexFoundation/robot/config.go +++ b/vendor/github.com/CortexFoundation/robot/config.go @@ -17,6 +17,7 @@ package robot import ( + "github.com/CortexFoundation/CortexTheseus/metrics" "time" ) @@ -25,3 +26,10 @@ const ( delay = 12 //params.Delay timeout = 30 * time.Second ) + +var ( + rpcBlockMeter = metrics.NewRegisteredMeter("torrent/block/call", nil) + rpcCurrentMeter = metrics.NewRegisteredMeter("torrent/current/call", nil) + rpcUploadMeter = metrics.NewRegisteredMeter("torrent/upload/call", nil) + rpcReceiptMeter = metrics.NewRegisteredMeter("torrent/receipt/call", nil) +) diff --git a/vendor/github.com/CortexFoundation/robot/model_srv.go b/vendor/github.com/CortexFoundation/robot/model_srv.go index 7a072f1760..f9336d28b5 100644 --- a/vendor/github.com/CortexFoundation/robot/model_srv.go +++ b/vendor/github.com/CortexFoundation/robot/model_srv.go @@ -79,6 +79,7 @@ func (m *Monitor) parseBlockTorrentInfo(b *types.Block) (bool, error) { start = mclock.Now() final []types.Transaction ) + for _, tx := range b.Txs { if meta := tx.Parse(); meta != nil { log.Debug("Data encounter", "ih", meta.InfoHash, "number", b.Number, "meta", meta) @@ -112,7 +113,7 @@ func (m *Monitor) parseBlockTorrentInfo(b *types.Block) (bool, error) { file.LeftSize = remainingSize if _, progress, err := m.fs.AddFile(file); err != nil { return false, err - } else if progress { // && progress { + } else if progress { log.Debug("Update storage success", "ih", file.Meta.InfoHash, "left", file.LeftSize) var bytesRequested uint64 if file.Meta.RawSize > file.LeftSize { @@ -134,10 +135,12 @@ func (m *Monitor) parseBlockTorrentInfo(b *types.Block) (bool, error) { final = append(final, tx) } } + if len(final) > 0 && len(final) < len(b.Txs) { log.Debug("Final txs layout", "total", len(b.Txs), "final", len(final), "num", b.Number, "txs", m.fs.Txs()) b.Txs = final } + if record { if err := m.fs.AddBlock(b); err == nil { log.Info("Root has been changed", "number", b.Number, "hash", b.Hash, "root", m.fs.Root()) @@ -145,9 +148,11 @@ func (m *Monitor) parseBlockTorrentInfo(b *types.Block) (bool, error) { log.Warn("Block added failed", "number", b.Number, "hash", b.Hash, "root", m.fs.Root(), "err", err) } } + if len(b.Txs) > 0 { elapsed := time.Duration(mclock.Now()) - time.Duration(start) log.Trace("Transactions scanning", "count", len(b.Txs), "number", b.Number, "elapsed", common.PrettyDuration(elapsed)) } + return record, nil } diff --git a/vendor/github.com/CortexFoundation/robot/monitor.go b/vendor/github.com/CortexFoundation/robot/monitor.go index 91b5c47564..084cfc265e 100644 --- a/vendor/github.com/CortexFoundation/robot/monitor.go +++ b/vendor/github.com/CortexFoundation/robot/monitor.go @@ -22,7 +22,6 @@ import ( "github.com/CortexFoundation/CortexTheseus/common" "github.com/CortexFoundation/CortexTheseus/common/mclock" "github.com/CortexFoundation/CortexTheseus/log" - "github.com/CortexFoundation/CortexTheseus/metrics" "github.com/CortexFoundation/CortexTheseus/rpc" "github.com/CortexFoundation/robot/backend" "github.com/CortexFoundation/torrentfs/params" @@ -38,13 +37,6 @@ import ( "time" ) -var ( - rpcBlockMeter = metrics.NewRegisteredMeter("torrent/block/call", nil) - rpcCurrentMeter = metrics.NewRegisteredMeter("torrent/current/call", nil) - rpcUploadMeter = metrics.NewRegisteredMeter("torrent/upload/call", nil) - rpcReceiptMeter = metrics.NewRegisteredMeter("torrent/receipt/call", nil) -) - // Monitor observes the data changes on the blockchain and synchronizes. // cl for ipc/rpc communication, dl for download manager, and fs for data storage. type Monitor struct { @@ -188,7 +180,7 @@ func (m *Monitor) Callback() chan any { return m.callback } -func (m *Monitor) loadHistory() error { +/*func (m *Monitor) loadHistory() error { torrents, _ := m.fs.InitTorrents() if m.mode != params.LAZY { for k, v := range torrents { @@ -206,7 +198,7 @@ func (m *Monitor) loadHistory() error { } return nil -} +}*/ func (m *Monitor) download(ctx context.Context, k string, v uint64) error { if m.mode != params.LAZY && m.callback != nil { diff --git a/vendor/github.com/zeebo/xxh3/accum_vector_avx512_amd64.s b/vendor/github.com/zeebo/xxh3/accum_vector_avx512_amd64.s index cfaf9f0a77..c8ce38a62b 100644 --- a/vendor/github.com/zeebo/xxh3/accum_vector_avx512_amd64.s +++ b/vendor/github.com/zeebo/xxh3/accum_vector_avx512_amd64.s @@ -21,179 +21,168 @@ TEXT ·accumAVX512(SB), NOSPLIT, $0-32 MOVQ len+24(FP), BX VMOVDQU64 (AX), Z1 VMOVDQU64 prime_avx512<>+0(SB), Z0 - VMOVDQU64 (DX), Z2 - VMOVDQU64 8(DX), Z3 - VMOVDQU64 16(DX), Z4 - VMOVDQU64 24(DX), Z5 - VMOVDQU64 32(DX), Z6 - VMOVDQU64 40(DX), Z7 - VMOVDQU64 48(DX), Z8 - VMOVDQU64 56(DX), Z9 - VMOVDQU64 64(DX), Z10 - VMOVDQU64 72(DX), Z11 - VMOVDQU64 80(DX), Z12 - VMOVDQU64 88(DX), Z13 - VMOVDQU64 96(DX), Z14 - VMOVDQU64 104(DX), Z15 - VMOVDQU64 112(DX), Z16 - VMOVDQU64 120(DX), Z17 - VMOVDQU64 128(DX), Z18 - VMOVDQU64 121(DX), Z19 + VMOVDQU64 (DX), Z3 + VMOVDQU64 8(DX), Z4 + VMOVDQU64 16(DX), Z5 + VMOVDQU64 24(DX), Z6 + VMOVDQU64 32(DX), Z7 + VMOVDQU64 40(DX), Z8 + VMOVDQU64 48(DX), Z9 + VMOVDQU64 56(DX), Z10 + VMOVDQU64 64(DX), Z11 + VMOVDQU64 72(DX), Z12 + VMOVDQU64 80(DX), Z13 + VMOVDQU64 88(DX), Z14 + VMOVDQU64 96(DX), Z15 + VMOVDQU64 104(DX), Z16 + VMOVDQU64 112(DX), Z17 + VMOVDQU64 120(DX), Z18 + VMOVDQU64 128(DX), Z19 + VMOVDQU64 121(DX), Z20 accum_large: CMPQ BX, $0x00000400 JLE accum - VMOVDQU64 (CX), Z20 + VMOVDQU64 (CX), Z21 PREFETCHT0 1024(CX) - VPXORD Z2, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z3, Z21, Z2 + VPSHUFD $0x31, Z2, Z22 + VPMULUDQ Z2, Z22, Z2 + VPSHUFD $0x4e, Z21, Z21 VPADDQ Z1, Z21, Z1 - VMOVDQU64 64(CX), Z20 + VMOVDQU64 64(CX), Z21 PREFETCHT0 1088(CX) - VPXORD Z3, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z4, Z21, Z22 + VPSHUFD $0x31, Z22, Z23 + VPMULUDQ Z22, Z23, Z22 + VPSHUFD $0x4e, Z21, Z21 + VPADDQ Z2, Z22, Z2 VPADDQ Z1, Z21, Z1 - VMOVDQU64 128(CX), Z20 + VMOVDQU64 128(CX), Z21 PREFETCHT0 1152(CX) - VPXORD Z4, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z5, Z21, Z22 + VPSHUFD $0x31, Z22, Z23 + VPMULUDQ Z22, Z23, Z22 + VPSHUFD $0x4e, Z21, Z21 + VPADDQ Z2, Z22, Z2 VPADDQ Z1, Z21, Z1 - VMOVDQU64 192(CX), Z20 + VMOVDQU64 192(CX), Z21 PREFETCHT0 1216(CX) - VPXORD Z5, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z6, Z21, Z22 + VPSHUFD $0x31, Z22, Z23 + VPMULUDQ Z22, Z23, Z22 + VPSHUFD $0x4e, Z21, Z21 + VPADDQ Z2, Z22, Z2 VPADDQ Z1, Z21, Z1 - VMOVDQU64 256(CX), Z20 + VMOVDQU64 256(CX), Z21 PREFETCHT0 1280(CX) - VPXORD Z6, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z7, Z21, Z22 + VPSHUFD $0x31, Z22, Z23 + VPMULUDQ Z22, Z23, Z22 + VPSHUFD $0x4e, Z21, Z21 + VPADDQ Z2, Z22, Z2 VPADDQ Z1, Z21, Z1 - VMOVDQU64 320(CX), Z20 + VMOVDQU64 320(CX), Z21 PREFETCHT0 1344(CX) - VPXORD Z7, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z8, Z21, Z22 + VPSHUFD $0x31, Z22, Z23 + VPMULUDQ Z22, Z23, Z22 + VPSHUFD $0x4e, Z21, Z21 + VPADDQ Z2, Z22, Z2 VPADDQ Z1, Z21, Z1 - VMOVDQU64 384(CX), Z20 + VMOVDQU64 384(CX), Z21 PREFETCHT0 1408(CX) - VPXORD Z8, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z9, Z21, Z22 + VPSHUFD $0x31, Z22, Z23 + VPMULUDQ Z22, Z23, Z22 + VPSHUFD $0x4e, Z21, Z21 + VPADDQ Z2, Z22, Z2 VPADDQ Z1, Z21, Z1 - VMOVDQU64 448(CX), Z20 + VMOVDQU64 448(CX), Z21 PREFETCHT0 1472(CX) - VPXORD Z9, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z10, Z21, Z22 + VPSHUFD $0x31, Z22, Z23 + VPMULUDQ Z22, Z23, Z22 + VPSHUFD $0x4e, Z21, Z21 + VPADDQ Z2, Z22, Z2 VPADDQ Z1, Z21, Z1 - VMOVDQU64 512(CX), Z20 + VMOVDQU64 512(CX), Z21 PREFETCHT0 1536(CX) - VPXORD Z10, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z11, Z21, Z22 + VPSHUFD $0x31, Z22, Z23 + VPMULUDQ Z22, Z23, Z22 + VPSHUFD $0x4e, Z21, Z21 + VPADDQ Z2, Z22, Z2 VPADDQ Z1, Z21, Z1 - VMOVDQU64 576(CX), Z20 + VMOVDQU64 576(CX), Z21 PREFETCHT0 1600(CX) - VPXORD Z11, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z12, Z21, Z22 + VPSHUFD $0x31, Z22, Z23 + VPMULUDQ Z22, Z23, Z22 + VPSHUFD $0x4e, Z21, Z21 + VPADDQ Z2, Z22, Z2 VPADDQ Z1, Z21, Z1 - VMOVDQU64 640(CX), Z20 + VMOVDQU64 640(CX), Z21 PREFETCHT0 1664(CX) - VPXORD Z12, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z13, Z21, Z22 + VPSHUFD $0x31, Z22, Z23 + VPMULUDQ Z22, Z23, Z22 + VPSHUFD $0x4e, Z21, Z21 + VPADDQ Z2, Z22, Z2 VPADDQ Z1, Z21, Z1 - VMOVDQU64 704(CX), Z20 + VMOVDQU64 704(CX), Z21 PREFETCHT0 1728(CX) - VPXORD Z13, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z14, Z21, Z22 + VPSHUFD $0x31, Z22, Z23 + VPMULUDQ Z22, Z23, Z22 + VPSHUFD $0x4e, Z21, Z21 + VPADDQ Z2, Z22, Z2 VPADDQ Z1, Z21, Z1 - VMOVDQU64 768(CX), Z20 + VMOVDQU64 768(CX), Z21 PREFETCHT0 1792(CX) - VPXORD Z14, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z15, Z21, Z22 + VPSHUFD $0x31, Z22, Z23 + VPMULUDQ Z22, Z23, Z22 + VPSHUFD $0x4e, Z21, Z21 + VPADDQ Z2, Z22, Z2 VPADDQ Z1, Z21, Z1 - VMOVDQU64 832(CX), Z20 + VMOVDQU64 832(CX), Z21 PREFETCHT0 1856(CX) - VPXORD Z15, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z16, Z21, Z22 + VPSHUFD $0x31, Z22, Z23 + VPMULUDQ Z22, Z23, Z22 + VPSHUFD $0x4e, Z21, Z21 + VPADDQ Z2, Z22, Z2 VPADDQ Z1, Z21, Z1 - VMOVDQU64 896(CX), Z20 + VMOVDQU64 896(CX), Z21 PREFETCHT0 1920(CX) - VPXORD Z16, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z17, Z21, Z22 + VPSHUFD $0x31, Z22, Z23 + VPMULUDQ Z22, Z23, Z22 + VPSHUFD $0x4e, Z21, Z21 + VPADDQ Z2, Z22, Z2 VPADDQ Z1, Z21, Z1 - VMOVDQU64 960(CX), Z20 + VMOVDQU64 960(CX), Z21 PREFETCHT0 1984(CX) - VPXORD Z17, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPXORD Z18, Z21, Z22 + VPSHUFD $0x31, Z22, Z23 + VPMULUDQ Z22, Z23, Z22 + VPSHUFD $0x4e, Z21, Z21 + VPADDQ Z2, Z22, Z2 VPADDQ Z1, Z21, Z1 + VPADDQ Z1, Z2, Z1 ADDQ $0x00000400, CX SUBQ $0x00000400, BX - VPSRLQ $0x2f, Z1, Z20 - VPTERNLOGD $0x96, Z1, Z18, Z20 - VPMULUDQ Z0, Z20, Z1 - VPSHUFD $0xf5, Z20, Z20 - VPMULUDQ Z0, Z20, Z20 - VPSLLQ $0x20, Z20, Z20 - VPADDQ Z1, Z20, Z1 + VPSRLQ $0x2f, Z1, Z2 + VPTERNLOGD $0x96, Z1, Z19, Z2 + VPMULUDQ Z0, Z2, Z1 + VPSHUFD $0xf5, Z2, Z2 + VPMULUDQ Z0, Z2, Z2 + VPSLLQ $0x20, Z2, Z2 + VPADDQ Z1, Z2, Z1 JMP accum_large accum: - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z2, Z0, Z2 - VPSHUFD $0x31, Z2, Z18 - VPMULUDQ Z2, Z18, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX CMPQ BX, $0x40 JLE finalize VMOVDQU64 (CX), Z0 @@ -201,8 +190,8 @@ accum: VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 ADDQ $0x00000040, CX SUBQ $0x00000040, BX CMPQ BX, $0x40 @@ -212,8 +201,8 @@ accum: VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 ADDQ $0x00000040, CX SUBQ $0x00000040, BX CMPQ BX, $0x40 @@ -223,8 +212,8 @@ accum: VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 ADDQ $0x00000040, CX SUBQ $0x00000040, BX CMPQ BX, $0x40 @@ -234,8 +223,8 @@ accum: VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 ADDQ $0x00000040, CX SUBQ $0x00000040, BX CMPQ BX, $0x40 @@ -245,8 +234,8 @@ accum: VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 ADDQ $0x00000040, CX SUBQ $0x00000040, BX CMPQ BX, $0x40 @@ -256,8 +245,8 @@ accum: VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 ADDQ $0x00000040, CX SUBQ $0x00000040, BX CMPQ BX, $0x40 @@ -267,8 +256,8 @@ accum: VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 ADDQ $0x00000040, CX SUBQ $0x00000040, BX CMPQ BX, $0x40 @@ -278,8 +267,8 @@ accum: VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 ADDQ $0x00000040, CX SUBQ $0x00000040, BX CMPQ BX, $0x40 @@ -289,8 +278,8 @@ accum: VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 ADDQ $0x00000040, CX SUBQ $0x00000040, BX CMPQ BX, $0x40 @@ -300,8 +289,8 @@ accum: VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 ADDQ $0x00000040, CX SUBQ $0x00000040, BX CMPQ BX, $0x40 @@ -311,8 +300,8 @@ accum: VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 ADDQ $0x00000040, CX SUBQ $0x00000040, BX CMPQ BX, $0x40 @@ -322,8 +311,8 @@ accum: VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 ADDQ $0x00000040, CX SUBQ $0x00000040, BX CMPQ BX, $0x40 @@ -333,8 +322,8 @@ accum: VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 ADDQ $0x00000040, CX SUBQ $0x00000040, BX CMPQ BX, $0x40 @@ -344,8 +333,8 @@ accum: VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 ADDQ $0x00000040, CX SUBQ $0x00000040, BX CMPQ BX, $0x40 @@ -355,8 +344,19 @@ accum: VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z2, Z1 VPADDQ Z1, Z0, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z18, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 ADDQ $0x00000040, CX SUBQ $0x00000040, BX @@ -366,12 +366,12 @@ finalize: SUBQ $0x40, CX ADDQ BX, CX VMOVDQU64 (CX), Z0 - VPXORD Z19, Z0, Z2 + VPXORD Z20, Z0, Z2 VPSHUFD $0x31, Z2, Z3 VPMULUDQ Z2, Z3, Z2 VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 VPADDQ Z1, Z2, Z1 + VPADDQ Z1, Z0, Z1 return: VMOVDQU64 Z1, (AX) diff --git a/vendor/github.com/zeebo/xxh3/accum_vector_avx_amd64.s b/vendor/github.com/zeebo/xxh3/accum_vector_avx_amd64.s index b53c1521f7..52cc003201 100644 --- a/vendor/github.com/zeebo/xxh3/accum_vector_avx_amd64.s +++ b/vendor/github.com/zeebo/xxh3/accum_vector_avx_amd64.s @@ -19,250 +19,255 @@ TEXT ·accumAVX2(SB), NOSPLIT, $0-32 VMOVDQU (AX), Y1 VMOVDQU 32(AX), Y2 VMOVDQU prime_avx<>+0(SB), Y0 + CMPQ SI, $0x00000400 + JLE accum + VMOVDQU 32(DX), Y5 + VMOVDQU 40(DX), Y6 + VMOVDQU 48(DX), Y7 + VMOVDQU 56(DX), Y8 + VMOVDQU 64(DX), Y9 accum_large: - CMPQ SI, $0x00000400 - JLE accum VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y6 - PREFETCHT0 512(CX) - VPXOR (DX), Y3, Y4 - VPXOR 32(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y6 - PREFETCHT0 576(CX) - VPXOR 8(DX), Y3, Y4 - VPXOR 40(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y6 - PREFETCHT0 640(CX) - VPXOR 16(DX), Y3, Y4 - VPXOR 48(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y6 - PREFETCHT0 704(CX) - VPXOR 24(DX), Y3, Y4 - VPXOR 56(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y6 - PREFETCHT0 768(CX) - VPXOR 32(DX), Y3, Y4 - VPXOR 64(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y6 - PREFETCHT0 832(CX) - VPXOR 40(DX), Y3, Y4 - VPXOR 72(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y6 - PREFETCHT0 896(CX) - VPXOR 48(DX), Y3, Y4 - VPXOR 80(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y6 - PREFETCHT0 960(CX) - VPXOR 56(DX), Y3, Y4 - VPXOR 88(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y6 + VMOVDQU 32(CX), Y4 PREFETCHT0 1024(CX) - VPXOR 64(DX), Y3, Y4 - VPXOR 96(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 + VPXOR (DX), Y3, Y10 + VPXOR Y5, Y4, Y12 + VPSHUFD $0x31, Y10, Y11 + VPSHUFD $0x31, Y12, Y13 + VPMULUDQ Y10, Y11, Y10 + VPMULUDQ Y12, Y13, Y12 VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y6 + VPSHUFD $0x4e, Y4, Y4 + VPADDQ Y1, Y10, Y1 + VPADDQ Y2, Y12, Y2 + VMOVDQU 64(CX), Y10 + VMOVDQU 96(CX), Y13 PREFETCHT0 1088(CX) - VPXOR 72(DX), Y3, Y4 - VPXOR 104(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 640(CX), Y3 - VMOVDQU 672(CX), Y6 + VPXOR 8(DX), Y10, Y11 + VPXOR Y6, Y13, Y14 + VPSHUFD $0x31, Y11, Y12 + VPSHUFD $0x31, Y14, Y15 + VPMULUDQ Y11, Y12, Y11 + VPMULUDQ Y14, Y15, Y14 + VPSHUFD $0x4e, Y10, Y10 + VPSHUFD $0x4e, Y13, Y13 + VPADDQ Y1, Y11, Y1 + VPADDQ Y2, Y14, Y2 + VPADDQ Y3, Y10, Y3 + VPADDQ Y4, Y13, Y4 + VMOVDQU 128(CX), Y10 + VMOVDQU 160(CX), Y13 PREFETCHT0 1152(CX) - VPXOR 80(DX), Y3, Y4 - VPXOR 112(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 704(CX), Y3 - VMOVDQU 736(CX), Y6 + VPXOR 16(DX), Y10, Y11 + VPXOR Y7, Y13, Y14 + VPSHUFD $0x31, Y11, Y12 + VPSHUFD $0x31, Y14, Y15 + VPMULUDQ Y11, Y12, Y11 + VPMULUDQ Y14, Y15, Y14 + VPSHUFD $0x4e, Y10, Y10 + VPSHUFD $0x4e, Y13, Y13 + VPADDQ Y1, Y11, Y1 + VPADDQ Y2, Y14, Y2 + VPADDQ Y3, Y10, Y3 + VPADDQ Y4, Y13, Y4 + VMOVDQU 192(CX), Y10 + VMOVDQU 224(CX), Y13 PREFETCHT0 1216(CX) - VPXOR 88(DX), Y3, Y4 - VPXOR 120(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 768(CX), Y3 - VMOVDQU 800(CX), Y6 + VPXOR 24(DX), Y10, Y11 + VPXOR Y8, Y13, Y14 + VPSHUFD $0x31, Y11, Y12 + VPSHUFD $0x31, Y14, Y15 + VPMULUDQ Y11, Y12, Y11 + VPMULUDQ Y14, Y15, Y14 + VPSHUFD $0x4e, Y10, Y10 + VPSHUFD $0x4e, Y13, Y13 + VPADDQ Y1, Y11, Y1 + VPADDQ Y2, Y14, Y2 + VPADDQ Y3, Y10, Y3 + VPADDQ Y4, Y13, Y4 + VMOVDQU 256(CX), Y10 + VMOVDQU 288(CX), Y13 PREFETCHT0 1280(CX) - VPXOR 96(DX), Y3, Y4 - VPXOR 128(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 832(CX), Y3 - VMOVDQU 864(CX), Y6 + VPXOR Y5, Y10, Y11 + VPXOR Y9, Y13, Y14 + VPSHUFD $0x31, Y11, Y12 + VPSHUFD $0x31, Y14, Y15 + VPMULUDQ Y11, Y12, Y11 + VPMULUDQ Y14, Y15, Y14 + VPSHUFD $0x4e, Y10, Y10 + VPSHUFD $0x4e, Y13, Y13 + VPADDQ Y1, Y11, Y1 + VPADDQ Y2, Y14, Y2 + VPADDQ Y3, Y10, Y3 + VPADDQ Y4, Y13, Y4 + VMOVDQU 320(CX), Y10 + VMOVDQU 352(CX), Y13 PREFETCHT0 1344(CX) - VPXOR 104(DX), Y3, Y4 - VPXOR 136(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 896(CX), Y3 - VMOVDQU 928(CX), Y6 + VPXOR Y6, Y10, Y11 + VPXOR 72(DX), Y13, Y14 + VPSHUFD $0x31, Y11, Y12 + VPSHUFD $0x31, Y14, Y15 + VPMULUDQ Y11, Y12, Y11 + VPMULUDQ Y14, Y15, Y14 + VPSHUFD $0x4e, Y10, Y10 + VPSHUFD $0x4e, Y13, Y13 + VPADDQ Y1, Y11, Y1 + VPADDQ Y2, Y14, Y2 + VPADDQ Y3, Y10, Y3 + VPADDQ Y4, Y13, Y4 + VMOVDQU 384(CX), Y10 + VMOVDQU 416(CX), Y13 PREFETCHT0 1408(CX) - VPXOR 112(DX), Y3, Y4 - VPXOR 144(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 960(CX), Y3 - VMOVDQU 992(CX), Y6 + VPXOR Y7, Y10, Y11 + VPXOR 80(DX), Y13, Y14 + VPSHUFD $0x31, Y11, Y12 + VPSHUFD $0x31, Y14, Y15 + VPMULUDQ Y11, Y12, Y11 + VPMULUDQ Y14, Y15, Y14 + VPSHUFD $0x4e, Y10, Y10 + VPSHUFD $0x4e, Y13, Y13 + VPADDQ Y1, Y11, Y1 + VPADDQ Y2, Y14, Y2 + VPADDQ Y3, Y10, Y3 + VPADDQ Y4, Y13, Y4 + VMOVDQU 448(CX), Y10 + VMOVDQU 480(CX), Y13 PREFETCHT0 1472(CX) - VPXOR 120(DX), Y3, Y4 - VPXOR 152(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 + VPXOR Y8, Y10, Y11 + VPXOR 88(DX), Y13, Y14 + VPSHUFD $0x31, Y11, Y12 + VPSHUFD $0x31, Y14, Y15 + VPMULUDQ Y11, Y12, Y11 + VPMULUDQ Y14, Y15, Y14 + VPSHUFD $0x4e, Y10, Y10 + VPSHUFD $0x4e, Y13, Y13 + VPADDQ Y1, Y11, Y1 + VPADDQ Y2, Y14, Y2 + VPADDQ Y3, Y10, Y3 + VPADDQ Y4, Y13, Y4 + VMOVDQU 512(CX), Y10 + VMOVDQU 544(CX), Y13 + PREFETCHT0 1536(CX) + VPXOR Y9, Y10, Y11 + VPXOR 96(DX), Y13, Y14 + VPSHUFD $0x31, Y11, Y12 + VPSHUFD $0x31, Y14, Y15 + VPMULUDQ Y11, Y12, Y11 + VPMULUDQ Y14, Y15, Y14 + VPSHUFD $0x4e, Y10, Y10 + VPSHUFD $0x4e, Y13, Y13 + VPADDQ Y1, Y11, Y1 + VPADDQ Y2, Y14, Y2 + VPADDQ Y3, Y10, Y3 + VPADDQ Y4, Y13, Y4 + VMOVDQU 576(CX), Y10 + VMOVDQU 608(CX), Y13 + PREFETCHT0 1600(CX) + VPXOR 72(DX), Y10, Y11 + VPXOR 104(DX), Y13, Y14 + VPSHUFD $0x31, Y11, Y12 + VPSHUFD $0x31, Y14, Y15 + VPMULUDQ Y11, Y12, Y11 + VPMULUDQ Y14, Y15, Y14 + VPSHUFD $0x4e, Y10, Y10 + VPSHUFD $0x4e, Y13, Y13 + VPADDQ Y1, Y11, Y1 + VPADDQ Y2, Y14, Y2 + VPADDQ Y3, Y10, Y3 + VPADDQ Y4, Y13, Y4 + VMOVDQU 640(CX), Y10 + VMOVDQU 672(CX), Y13 + PREFETCHT0 1664(CX) + VPXOR 80(DX), Y10, Y11 + VPXOR 112(DX), Y13, Y14 + VPSHUFD $0x31, Y11, Y12 + VPSHUFD $0x31, Y14, Y15 + VPMULUDQ Y11, Y12, Y11 + VPMULUDQ Y14, Y15, Y14 + VPSHUFD $0x4e, Y10, Y10 + VPSHUFD $0x4e, Y13, Y13 + VPADDQ Y1, Y11, Y1 + VPADDQ Y2, Y14, Y2 + VPADDQ Y3, Y10, Y3 + VPADDQ Y4, Y13, Y4 + VMOVDQU 704(CX), Y10 + VMOVDQU 736(CX), Y13 + PREFETCHT0 1728(CX) + VPXOR 88(DX), Y10, Y11 + VPXOR 120(DX), Y13, Y14 + VPSHUFD $0x31, Y11, Y12 + VPSHUFD $0x31, Y14, Y15 + VPMULUDQ Y11, Y12, Y11 + VPMULUDQ Y14, Y15, Y14 + VPSHUFD $0x4e, Y10, Y10 + VPSHUFD $0x4e, Y13, Y13 + VPADDQ Y1, Y11, Y1 + VPADDQ Y2, Y14, Y2 + VPADDQ Y3, Y10, Y3 + VPADDQ Y4, Y13, Y4 + VMOVDQU 768(CX), Y10 + VMOVDQU 800(CX), Y13 + PREFETCHT0 1792(CX) + VPXOR 96(DX), Y10, Y11 + VPXOR 128(DX), Y13, Y14 + VPSHUFD $0x31, Y11, Y12 + VPSHUFD $0x31, Y14, Y15 + VPMULUDQ Y11, Y12, Y11 + VPMULUDQ Y14, Y15, Y14 + VPSHUFD $0x4e, Y10, Y10 + VPSHUFD $0x4e, Y13, Y13 + VPADDQ Y1, Y11, Y1 + VPADDQ Y2, Y14, Y2 + VPADDQ Y3, Y10, Y3 + VPADDQ Y4, Y13, Y4 + VMOVDQU 832(CX), Y10 + VMOVDQU 864(CX), Y13 + PREFETCHT0 1856(CX) + VPXOR 104(DX), Y10, Y11 + VPXOR 136(DX), Y13, Y14 + VPSHUFD $0x31, Y11, Y12 + VPSHUFD $0x31, Y14, Y15 + VPMULUDQ Y11, Y12, Y11 + VPMULUDQ Y14, Y15, Y14 + VPSHUFD $0x4e, Y10, Y10 + VPSHUFD $0x4e, Y13, Y13 + VPADDQ Y1, Y11, Y1 + VPADDQ Y2, Y14, Y2 + VPADDQ Y3, Y10, Y3 + VPADDQ Y4, Y13, Y4 + VMOVDQU 896(CX), Y10 + VMOVDQU 928(CX), Y13 + PREFETCHT0 1920(CX) + VPXOR 112(DX), Y10, Y11 + VPXOR 144(DX), Y13, Y14 + VPSHUFD $0x31, Y11, Y12 + VPSHUFD $0x31, Y14, Y15 + VPMULUDQ Y11, Y12, Y11 + VPMULUDQ Y14, Y15, Y14 + VPSHUFD $0x4e, Y10, Y10 + VPSHUFD $0x4e, Y13, Y13 + VPADDQ Y1, Y11, Y1 + VPADDQ Y2, Y14, Y2 + VPADDQ Y3, Y10, Y3 + VPADDQ Y4, Y13, Y4 + VMOVDQU 960(CX), Y10 + VMOVDQU 992(CX), Y13 + PREFETCHT0 1984(CX) + VPXOR 120(DX), Y10, Y11 + VPXOR 152(DX), Y13, Y14 + VPSHUFD $0x31, Y11, Y12 + VPSHUFD $0x31, Y14, Y15 + VPMULUDQ Y11, Y12, Y11 + VPMULUDQ Y14, Y15, Y14 + VPSHUFD $0x4e, Y10, Y10 + VPSHUFD $0x4e, Y13, Y13 + VPADDQ Y1, Y11, Y1 + VPADDQ Y2, Y14, Y2 + VPADDQ Y3, Y10, Y3 + VPADDQ Y4, Y13, Y4 VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 + VPADDQ Y2, Y4, Y2 ADDQ $0x00000400, CX SUBQ $0x00000400, SI VPSRLQ $0x2f, Y1, Y3 @@ -281,6 +286,8 @@ accum_large: VPMULUDQ Y0, Y3, Y3 VPSLLQ $0x20, Y3, Y3 VPADDQ Y2, Y3, Y2 + CMPQ SI, $0x00000400 + JLE accum JMP accum_large accum: @@ -296,10 +303,10 @@ accum: VPMULUDQ Y6, Y7, Y6 VPSHUFD $0x4e, Y0, Y0 VPSHUFD $0x4e, Y5, Y5 - VPADDQ Y1, Y0, Y1 VPADDQ Y1, Y3, Y1 - VPADDQ Y2, Y5, Y2 VPADDQ Y2, Y6, Y2 + VPADDQ Y1, Y0, Y1 + VPADDQ Y2, Y5, Y2 ADDQ $0x00000040, CX SUBQ $0x00000040, SI ADDQ $0x00000008, BX @@ -320,10 +327,10 @@ finalize: VPMULUDQ Y6, Y7, Y6 VPSHUFD $0x4e, Y0, Y0 VPSHUFD $0x4e, Y5, Y5 - VPADDQ Y1, Y0, Y1 VPADDQ Y1, Y3, Y1 - VPADDQ Y2, Y5, Y2 VPADDQ Y2, Y6, Y2 + VPADDQ Y1, Y0, Y1 + VPADDQ Y2, Y5, Y2 return: VMOVDQU Y1, (AX) @@ -341,229 +348,229 @@ TEXT ·accumBlockAVX2(SB), NOSPLIT, $0-24 VMOVDQU 32(AX), Y2 VMOVDQU prime_avx<>+0(SB), Y0 VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y6 - VPXOR (DX), Y3, Y4 - VPXOR 32(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y6 - VPXOR 8(DX), Y3, Y4 - VPXOR 40(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y6 - VPXOR 16(DX), Y3, Y4 - VPXOR 48(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y6 - VPXOR 24(DX), Y3, Y4 - VPXOR 56(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y6 - VPXOR 32(DX), Y3, Y4 - VPXOR 64(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y6 - VPXOR 40(DX), Y3, Y4 - VPXOR 72(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y6 - VPXOR 48(DX), Y3, Y4 - VPXOR 80(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y6 - VPXOR 56(DX), Y3, Y4 - VPXOR 88(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y6 - VPXOR 64(DX), Y3, Y4 - VPXOR 96(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y6 - VPXOR 72(DX), Y3, Y4 - VPXOR 104(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 + VMOVDQU 32(CX), Y4 + VPXOR (DX), Y3, Y5 + VPXOR 32(DX), Y4, Y7 + VPSHUFD $0x31, Y5, Y6 VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y5, Y6, Y5 VPMULUDQ Y7, Y8, Y7 VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 + VPSHUFD $0x4e, Y4, Y4 + VPADDQ Y1, Y5, Y1 VPADDQ Y2, Y7, Y2 - VMOVDQU 640(CX), Y3 - VMOVDQU 672(CX), Y6 - VPXOR 80(DX), Y3, Y4 - VPXOR 112(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 704(CX), Y3 - VMOVDQU 736(CX), Y6 - VPXOR 88(DX), Y3, Y4 - VPXOR 120(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 768(CX), Y3 - VMOVDQU 800(CX), Y6 - VPXOR 96(DX), Y3, Y4 - VPXOR 128(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 832(CX), Y3 - VMOVDQU 864(CX), Y6 - VPXOR 104(DX), Y3, Y4 - VPXOR 136(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 896(CX), Y3 - VMOVDQU 928(CX), Y6 - VPXOR 112(DX), Y3, Y4 - VPXOR 144(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 960(CX), Y3 - VMOVDQU 992(CX), Y6 - VPXOR 120(DX), Y3, Y4 - VPXOR 152(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 + VMOVDQU 64(CX), Y5 + VMOVDQU 96(CX), Y8 + VPXOR 8(DX), Y5, Y6 + VPXOR 40(DX), Y8, Y9 + VPSHUFD $0x31, Y6, Y7 + VPSHUFD $0x31, Y9, Y10 + VPMULUDQ Y6, Y7, Y6 + VPMULUDQ Y9, Y10, Y9 + VPSHUFD $0x4e, Y5, Y5 + VPSHUFD $0x4e, Y8, Y8 + VPADDQ Y1, Y6, Y1 + VPADDQ Y2, Y9, Y2 + VPADDQ Y3, Y5, Y3 + VPADDQ Y4, Y8, Y4 + VMOVDQU 128(CX), Y5 + VMOVDQU 160(CX), Y8 + VPXOR 16(DX), Y5, Y6 + VPXOR 48(DX), Y8, Y9 + VPSHUFD $0x31, Y6, Y7 + VPSHUFD $0x31, Y9, Y10 + VPMULUDQ Y6, Y7, Y6 + VPMULUDQ Y9, Y10, Y9 + VPSHUFD $0x4e, Y5, Y5 + VPSHUFD $0x4e, Y8, Y8 + VPADDQ Y1, Y6, Y1 + VPADDQ Y2, Y9, Y2 + VPADDQ Y3, Y5, Y3 + VPADDQ Y4, Y8, Y4 + VMOVDQU 192(CX), Y5 + VMOVDQU 224(CX), Y8 + VPXOR 24(DX), Y5, Y6 + VPXOR 56(DX), Y8, Y9 + VPSHUFD $0x31, Y6, Y7 + VPSHUFD $0x31, Y9, Y10 + VPMULUDQ Y6, Y7, Y6 + VPMULUDQ Y9, Y10, Y9 + VPSHUFD $0x4e, Y5, Y5 + VPSHUFD $0x4e, Y8, Y8 + VPADDQ Y1, Y6, Y1 + VPADDQ Y2, Y9, Y2 + VPADDQ Y3, Y5, Y3 + VPADDQ Y4, Y8, Y4 + VMOVDQU 256(CX), Y5 + VMOVDQU 288(CX), Y8 + VPXOR 32(DX), Y5, Y6 + VPXOR 64(DX), Y8, Y9 + VPSHUFD $0x31, Y6, Y7 + VPSHUFD $0x31, Y9, Y10 + VPMULUDQ Y6, Y7, Y6 + VPMULUDQ Y9, Y10, Y9 + VPSHUFD $0x4e, Y5, Y5 + VPSHUFD $0x4e, Y8, Y8 + VPADDQ Y1, Y6, Y1 + VPADDQ Y2, Y9, Y2 + VPADDQ Y3, Y5, Y3 + VPADDQ Y4, Y8, Y4 + VMOVDQU 320(CX), Y5 + VMOVDQU 352(CX), Y8 + VPXOR 40(DX), Y5, Y6 + VPXOR 72(DX), Y8, Y9 + VPSHUFD $0x31, Y6, Y7 + VPSHUFD $0x31, Y9, Y10 + VPMULUDQ Y6, Y7, Y6 + VPMULUDQ Y9, Y10, Y9 + VPSHUFD $0x4e, Y5, Y5 + VPSHUFD $0x4e, Y8, Y8 + VPADDQ Y1, Y6, Y1 + VPADDQ Y2, Y9, Y2 + VPADDQ Y3, Y5, Y3 + VPADDQ Y4, Y8, Y4 + VMOVDQU 384(CX), Y5 + VMOVDQU 416(CX), Y8 + VPXOR 48(DX), Y5, Y6 + VPXOR 80(DX), Y8, Y9 + VPSHUFD $0x31, Y6, Y7 + VPSHUFD $0x31, Y9, Y10 + VPMULUDQ Y6, Y7, Y6 + VPMULUDQ Y9, Y10, Y9 + VPSHUFD $0x4e, Y5, Y5 + VPSHUFD $0x4e, Y8, Y8 + VPADDQ Y1, Y6, Y1 + VPADDQ Y2, Y9, Y2 + VPADDQ Y3, Y5, Y3 + VPADDQ Y4, Y8, Y4 + VMOVDQU 448(CX), Y5 + VMOVDQU 480(CX), Y8 + VPXOR 56(DX), Y5, Y6 + VPXOR 88(DX), Y8, Y9 + VPSHUFD $0x31, Y6, Y7 + VPSHUFD $0x31, Y9, Y10 + VPMULUDQ Y6, Y7, Y6 + VPMULUDQ Y9, Y10, Y9 + VPSHUFD $0x4e, Y5, Y5 + VPSHUFD $0x4e, Y8, Y8 + VPADDQ Y1, Y6, Y1 + VPADDQ Y2, Y9, Y2 + VPADDQ Y3, Y5, Y3 + VPADDQ Y4, Y8, Y4 + VMOVDQU 512(CX), Y5 + VMOVDQU 544(CX), Y8 + VPXOR 64(DX), Y5, Y6 + VPXOR 96(DX), Y8, Y9 + VPSHUFD $0x31, Y6, Y7 + VPSHUFD $0x31, Y9, Y10 + VPMULUDQ Y6, Y7, Y6 + VPMULUDQ Y9, Y10, Y9 + VPSHUFD $0x4e, Y5, Y5 + VPSHUFD $0x4e, Y8, Y8 + VPADDQ Y1, Y6, Y1 + VPADDQ Y2, Y9, Y2 + VPADDQ Y3, Y5, Y3 + VPADDQ Y4, Y8, Y4 + VMOVDQU 576(CX), Y5 + VMOVDQU 608(CX), Y8 + VPXOR 72(DX), Y5, Y6 + VPXOR 104(DX), Y8, Y9 + VPSHUFD $0x31, Y6, Y7 + VPSHUFD $0x31, Y9, Y10 + VPMULUDQ Y6, Y7, Y6 + VPMULUDQ Y9, Y10, Y9 + VPSHUFD $0x4e, Y5, Y5 + VPSHUFD $0x4e, Y8, Y8 + VPADDQ Y1, Y6, Y1 + VPADDQ Y2, Y9, Y2 + VPADDQ Y3, Y5, Y3 + VPADDQ Y4, Y8, Y4 + VMOVDQU 640(CX), Y5 + VMOVDQU 672(CX), Y8 + VPXOR 80(DX), Y5, Y6 + VPXOR 112(DX), Y8, Y9 + VPSHUFD $0x31, Y6, Y7 + VPSHUFD $0x31, Y9, Y10 + VPMULUDQ Y6, Y7, Y6 + VPMULUDQ Y9, Y10, Y9 + VPSHUFD $0x4e, Y5, Y5 + VPSHUFD $0x4e, Y8, Y8 + VPADDQ Y1, Y6, Y1 + VPADDQ Y2, Y9, Y2 + VPADDQ Y3, Y5, Y3 + VPADDQ Y4, Y8, Y4 + VMOVDQU 704(CX), Y5 + VMOVDQU 736(CX), Y8 + VPXOR 88(DX), Y5, Y6 + VPXOR 120(DX), Y8, Y9 + VPSHUFD $0x31, Y6, Y7 + VPSHUFD $0x31, Y9, Y10 + VPMULUDQ Y6, Y7, Y6 + VPMULUDQ Y9, Y10, Y9 + VPSHUFD $0x4e, Y5, Y5 + VPSHUFD $0x4e, Y8, Y8 + VPADDQ Y1, Y6, Y1 + VPADDQ Y2, Y9, Y2 + VPADDQ Y3, Y5, Y3 + VPADDQ Y4, Y8, Y4 + VMOVDQU 768(CX), Y5 + VMOVDQU 800(CX), Y8 + VPXOR 96(DX), Y5, Y6 + VPXOR 128(DX), Y8, Y9 + VPSHUFD $0x31, Y6, Y7 + VPSHUFD $0x31, Y9, Y10 + VPMULUDQ Y6, Y7, Y6 + VPMULUDQ Y9, Y10, Y9 + VPSHUFD $0x4e, Y5, Y5 + VPSHUFD $0x4e, Y8, Y8 + VPADDQ Y1, Y6, Y1 + VPADDQ Y2, Y9, Y2 + VPADDQ Y3, Y5, Y3 + VPADDQ Y4, Y8, Y4 + VMOVDQU 832(CX), Y5 + VMOVDQU 864(CX), Y8 + VPXOR 104(DX), Y5, Y6 + VPXOR 136(DX), Y8, Y9 + VPSHUFD $0x31, Y6, Y7 + VPSHUFD $0x31, Y9, Y10 + VPMULUDQ Y6, Y7, Y6 + VPMULUDQ Y9, Y10, Y9 + VPSHUFD $0x4e, Y5, Y5 + VPSHUFD $0x4e, Y8, Y8 + VPADDQ Y1, Y6, Y1 + VPADDQ Y2, Y9, Y2 + VPADDQ Y3, Y5, Y3 + VPADDQ Y4, Y8, Y4 + VMOVDQU 896(CX), Y5 + VMOVDQU 928(CX), Y8 + VPXOR 112(DX), Y5, Y6 + VPXOR 144(DX), Y8, Y9 + VPSHUFD $0x31, Y6, Y7 + VPSHUFD $0x31, Y9, Y10 + VPMULUDQ Y6, Y7, Y6 + VPMULUDQ Y9, Y10, Y9 + VPSHUFD $0x4e, Y5, Y5 + VPSHUFD $0x4e, Y8, Y8 + VPADDQ Y1, Y6, Y1 + VPADDQ Y2, Y9, Y2 + VPADDQ Y3, Y5, Y3 + VPADDQ Y4, Y8, Y4 + VMOVDQU 960(CX), Y5 + VMOVDQU 992(CX), Y8 + VPXOR 120(DX), Y5, Y6 + VPXOR 152(DX), Y8, Y9 + VPSHUFD $0x31, Y6, Y7 + VPSHUFD $0x31, Y9, Y10 + VPMULUDQ Y6, Y7, Y6 + VPMULUDQ Y9, Y10, Y9 + VPSHUFD $0x4e, Y5, Y5 + VPSHUFD $0x4e, Y8, Y8 + VPADDQ Y1, Y6, Y1 + VPADDQ Y2, Y9, Y2 + VPADDQ Y3, Y5, Y3 + VPADDQ Y4, Y8, Y4 VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 + VPADDQ Y2, Y4, Y2 VPSRLQ $0x2f, Y1, Y3 VPXOR Y1, Y3, Y3 VPXOR 128(DX), Y3, Y3 diff --git a/vendor/modules.txt b/vendor/modules.txt index 7ade93e154..94c5737858 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -36,14 +36,14 @@ github.com/CortexFoundation/inference/synapse # github.com/CortexFoundation/merkletree v0.0.0-20230724124840-b6e80265a137 ## explicit; go 1.19 github.com/CortexFoundation/merkletree -# github.com/CortexFoundation/robot v1.0.7-0.20230924130929-01afc8203c14 +# github.com/CortexFoundation/robot v1.0.7-0.20230924205950-05c3925242ed ## explicit; go 1.20 github.com/CortexFoundation/robot github.com/CortexFoundation/robot/backend # github.com/CortexFoundation/statik v0.0.0-20210315012922-8bb8a7b5dc66 ## explicit; go 1.16 github.com/CortexFoundation/statik -# github.com/CortexFoundation/torrentfs v1.0.55-0.20230924132644-328dc7f8815c +# github.com/CortexFoundation/torrentfs v1.0.55-0.20230925132950-bf8f925efaea ## explicit; go 1.21 github.com/CortexFoundation/torrentfs github.com/CortexFoundation/torrentfs/backend @@ -980,7 +980,7 @@ github.com/ucwong/golang-kv/leveldb github.com/ucwong/golang-kv/nutsdb github.com/ucwong/golang-kv/pebble github.com/ucwong/golang-kv/rosedb -# github.com/ucwong/shard v1.0.1-0.20230902205521-676c0c9c1dd2 +# github.com/ucwong/shard v1.0.1-0.20230924231639-2ac2d8ab288c ## explicit; go 1.20 github.com/ucwong/shard # github.com/urfave/cli/v2 v2.25.7 @@ -999,7 +999,7 @@ github.com/xujiajun/utils/strconv2 # github.com/yusufpapurcu/wmi v1.2.3 ## explicit; go 1.16 github.com/yusufpapurcu/wmi -# github.com/zeebo/xxh3 v1.0.3-0.20230105190837-8b1e819c6d4d +# github.com/zeebo/xxh3 v1.0.3-0.20230502181907-3808c706a06a ## explicit; go 1.17 github.com/zeebo/xxh3 # go.etcd.io/bbolt v1.3.7