Skip to content

Commit

Permalink
arm64: memset
Browse files Browse the repository at this point in the history
  • Loading branch information
bwesterb committed Jul 24, 2024
1 parent 6df2f85 commit a9e633f
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 2 deletions.
11 changes: 9 additions & 2 deletions and_arm64.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ func andNotNEON(dst, a, b *byte, l uint64)
//go:noescape
func popcntNEON(a *byte, l uint64) uint64

//go:noescape
func memsetNEON(dst *byte, l uint64, b byte)

func and(dst, a, b []byte) {
l := uint64(len(a)) >> 8
if l != 0 {
Expand Down Expand Up @@ -64,6 +67,10 @@ func popcnt(a []byte) int {
}

func memset(dst []byte, b byte) {
// TODO: Write a NEON version for this
memsetGeneric(dst, b)
l := uint64(len(dst)) >> 8
if l != 0 {
memsetNEON(&dst[0], l, b)
}
l <<= 8
memsetGeneric(dst[l:], b)
}
22 changes: 22 additions & 0 deletions and_arm64.s
Original file line number Diff line number Diff line change
Expand Up @@ -276,3 +276,25 @@ loop:
MOVD R0, ret+16(FP)

RET

// func memsetNEON(dst *byte, l uint64, b byte)
TEXT ·memsetNEON(SB), NOSPLIT, $0-17
MOVD dst+0(FP), R1
MOVD l+8(FP), R2
MOVB b+16(FP), R0

VDUP R0, V0.B16
VORR V0.B16, V0.B16, V1.B16
VORR V0.B16, V0.B16, V2.B16
VORR V0.B16, V0.B16, V3.B16

loop:
VST1.P [ V0.B16, V1.B16, V2.B16, V3.B16], 64(R1)
VST1.P [ V0.B16, V1.B16, V2.B16, V3.B16], 64(R1)
VST1.P [ V0.B16, V1.B16, V2.B16, V3.B16], 64(R1)
VST1.P [ V0.B16, V1.B16, V2.B16, V3.B16], 64(R1)

SUBS $1, R2, R2
CBNZ R2, loop

RET

0 comments on commit a9e633f

Please sign in to comment.