Skip to content

Commit

Permalink
Add Popcnt()
Browse files Browse the repository at this point in the history
goos: linux
goarch: amd64
pkg: github.com/bwesterb/go-and
cpu: 13th Gen Intel(R) Core(TM) i9-13900
BenchmarkPopcnt-32           	   34485	     32383 ns/op	30879.98 MB/s
BenchmarkPopcntGeneric-32    	   17019	     70787 ns/op	14126.98 MB/s
  • Loading branch information
Jille committed Jul 18, 2024
1 parent 1307755 commit f9d143e
Show file tree
Hide file tree
Showing 11 changed files with 200 additions and 0 deletions.
14 changes: 14 additions & 0 deletions and_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,17 @@ func andNot(dst, a, b []byte) {
}
andNotGeneric(dst[l:], a[l:], b[l:])
}

func popcnt(a []byte) int {
l := uint64(0)
var ret int
if hasPopcnt() {
l = uint64(len(a)) >> 6
if l != 0 {
ret = popcntAVX2(&a[0], l)
}
l <<= 6
}
ret += popcntGeneric(a[l:])
return ret
}
38 changes: 38 additions & 0 deletions and_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,41 @@ loop:
SUBQ $0x00000001, BX
JNZ loop
RET

// func popcntAVX2(a *byte, l uint64) int
// Requires: POPCNT
TEXT ·popcntAVX2(SB), NOSPLIT, $0-24
MOVQ a+0(FP), AX
MOVQ l+8(FP), CX
XORQ DX, DX

loop:
MOVQ (AX), BX
MOVQ 8(AX), SI
MOVQ 16(AX), DI
MOVQ 24(AX), R8
MOVQ 32(AX), R9
MOVQ 40(AX), R10
MOVQ 48(AX), R11
MOVQ 56(AX), R12
POPCNTQ BX, BX
POPCNTQ SI, SI
POPCNTQ DI, DI
POPCNTQ R8, R8
POPCNTQ R9, R9
POPCNTQ R10, R10
POPCNTQ R11, R11
POPCNTQ R12, R12
ADDQ BX, DX
ADDQ SI, DX
ADDQ DI, DX
ADDQ R8, DX
ADDQ R9, DX
ADDQ R10, DX
ADDQ R11, DX
ADDQ R12, DX
ADDQ $0x00000040, AX
SUBQ $0x00000001, CX
JNZ loop
MOVQ DX, ret+16(FP)
RET
5 changes: 5 additions & 0 deletions and_arm64.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,8 @@ func andNot(dst, a, b []byte) {
// TODO: Write a NEON version for this
andNotGeneric(dst, a, b)
}

func popcnt(a []byte) int {
// TODO: Write a NEON version for this
return popcntGeneric(a)
}
5 changes: 5 additions & 0 deletions and_stubs_amd64.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions detect_popcnt_always.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
//go:build amd64.v2

package and

func hasPopcnt() bool {
return true
}
7 changes: 7 additions & 0 deletions detect_popcnt_never.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
//go:build !amd64

package and

func hasPopcnt() bool {
return false
}
9 changes: 9 additions & 0 deletions detect_popcnt_runtime.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
//go:build amd64 && !amd64.v2

package and

import "golang.org/x/sys/cpu"

func hasPopcnt() bool {
return cpu.X86.HasPOPCNT
}
4 changes: 4 additions & 0 deletions fallback.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ func or(dst, a, b []byte) {
func andNot(dst, a, b []byte) {
andNotGeneric(dst, a, b)
}

func popcnt(a []byte) int {
return popcntGeneric(a)
}
38 changes: 38 additions & 0 deletions internal/asm/src.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ func main() {
gen("and", VPAND, "Sets dst to the bitwise and of a and b")
gen("or", VPOR, "Sets dst to the bitwise or of a and b")
gen("andNot", VPANDN, "Sets dst to the bitwise and of not(a) and b")
genPopcnt()
Generate()
}

Expand Down Expand Up @@ -48,3 +49,40 @@ func gen(name string, op func(Op, Op, Op), doc string) {

RET()
}

func genPopcnt() {
TEXT("popcntAVX2", NOSPLIT, "func(a *byte, l uint64) int")

Pragma("noescape")

Doc("Counts the number of bits set in a assuming all are 64*l bytes")
a := Load(Param("a"), GP64())
l := Load(Param("l"), GP64())

ret := GP64()

as := []Op{GP64(), GP64(), GP64(), GP64(), GP64(), GP64(), GP64(), GP64()}
intermediates := []Op{GP64(), GP64(), GP64(), GP64(), GP64(), GP64(), GP64(), GP64()}

Doc("Zero the return register")
XORQ(ret, ret)

Label("loop")

for i := 0; i < len(as); i++ {
MOVQ(Mem{Base: a, Disp: 8 * i}, as[i])
}
for i := 0; i < len(as); i++ {
POPCNTQ(as[i], intermediates[i])
}
for i := 0; i < len(as); i++ {
ADDQ(intermediates[i], ret)
}

ADDQ(U32(len(as)*8), a)
SUBQ(U32(1), l)
JNZ(LabelRef("loop"))

Store(ret, ReturnIndex(0))
RET()
}
22 changes: 22 additions & 0 deletions lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package and

import (
"encoding/binary"
"math/bits"
)

// Writes bitwise and of a and b to dst.
Expand Down Expand Up @@ -82,3 +83,24 @@ func andNotGeneric(dst, a, b []byte) {
dst[i] = (^a[i]) & b[i]
}
}

// Writes bitwise and of not(a) and b to dst.
//
// Panics if len(a) ≠ len(b), or len(dst) ≠ len(a).
func Popcnt(a []byte) int {
return popcnt(a)
}

func popcntGeneric(a []byte) int {
var ret int
i := 0

for ; i <= len(a)-8; i += 8 {
ret += bits.OnesCount64(binary.LittleEndian.Uint64(a[i:]))
}

for ; i < len(a); i++ {
ret += bits.OnesCount8(a[i])
}
return ret
}
51 changes: 51 additions & 0 deletions popcnt_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package and

import (
"math/rand/v2"
"testing"
)

func testPopcntAgainstGeneric(t *testing.T, size int) {
a := make([]byte, size)
rng := rand.New(rand.NewPCG(0, 0))
for i := range a {
a[i] = uint8(rng.UintN(256))
}
got := Popcnt(a)
want := popcntGeneric(a)
if got != want {
t.Fatalf("Popcnt produced a different result from popcntGeneric at length %d: %d; want %d", size, got, want)
}
}

func TestPopcntAgainstGeneric(t *testing.T) {
for i := 0; i < 20; i++ {
size := 1 << i
testPopcntAgainstGeneric(t, size)
for j := 0; j < 10; j++ {
testPopcntAgainstGeneric(t, size+rand.IntN(100))
}
}
}

func BenchmarkPopcnt(b *testing.B) {
b.StopTimer()
size := 1000000
a := make([]byte, size)
b.SetBytes(int64(size))
b.StartTimer()
for i := 0; i < b.N; i++ {
_ = Popcnt(a)
}
}

func BenchmarkPopcntGeneric(b *testing.B) {
b.StopTimer()
size := 1000000
a := make([]byte, size)
b.SetBytes(int64(size))
b.StartTimer()
for i := 0; i < b.N; i++ {
_ = popcntGeneric(a)
}
}

0 comments on commit f9d143e

Please sign in to comment.