Skip to content

Commit

Permalink
Add optimised memcpy, memset and memmove for the aarch64 memops exten…
Browse files Browse the repository at this point in the history
…sion

Summary:
Newer Arm CPUs will expose a FEAT_MOPS, and on newer kernels this funcitonality will be exposed via the HWCAP2_MOPS bit in AT_HWCAPS2.

Wire up support for the hardware-optimised memory operations ahead of silicon availability.

Reviewed By: Gownta

Differential Revision: D63754476

fbshipit-source-id: f067dc2942e3f7715da8a0fe281149c0269fd913
  • Loading branch information
Michael van der Westhuizen authored and facebook-github-bot committed Oct 3, 2024
1 parent 4a4a803 commit 4629171
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 9 deletions.
6 changes: 6 additions & 0 deletions folly/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,9 @@ cpp_library(
"ovr_config//cpu:x86_64": [
"memset.S",
],
"ovr_config//os:linux-arm64": [
"memset_select_aarch64.cpp",
],
}),
auto_headers = AutoHeaders.NONE,
headers = [],
Expand Down Expand Up @@ -808,6 +811,9 @@ cpp_library(
"ovr_config//cpu:x86_64": [
"memset.S",
],
"ovr_config//os:linux-arm64": [
"memset_select_aarch64.cpp",
],
}),
auto_headers = AutoHeaders.NONE,
headers = [],
Expand Down
6 changes: 6 additions & 0 deletions folly/external/aor/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ cpp_library(
srcs = [
"memcpy-advsimd.S",
"memcpy-armv8.S",
"memcpy-mops.S",
"memcpy_sve.S",
"memmove-mops.S",
],
headers = [
"asmdefs.h",
Expand All @@ -22,7 +24,9 @@ cpp_library(
srcs = [
"memcpy-advsimd.S",
"memcpy-armv8.S",
"memcpy-mops.S",
"memcpy_sve.S",
"memmove-mops.S",
],
headers = [
"asmdefs.h",
Expand All @@ -41,6 +45,7 @@ cpp_library(
name = "memset_aarch64",
srcs = [
"memset-advsimd.S",
"memset-mops.S",
],
headers = [
"asmdefs.h",
Expand All @@ -54,6 +59,7 @@ cpp_library(
name = "memset_aarch64-use",
srcs = [
"memset-advsimd.S",
"memset-mops.S",
],
headers = [
"asmdefs.h",
Expand Down
21 changes: 21 additions & 0 deletions folly/external/aor/memcpy-mops.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/*
* memcpy using MOPS extension.
*
* Copyright (c) 2023, Arm Limited.
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/

#include "asmdefs.h"

ENTRY (__folly_memcpy_aarch64_mops)
PTR_ARG (0)
PTR_ARG (1)
SIZE_ARG (2)

mov x3, x0
.inst 0x19010443 /* cpyfp [x3]!, [x1]!, x2! */
.inst 0x19410443 /* cpyfm [x3]!, [x1]!, x2! */
.inst 0x19810443 /* cpyfe [x3]!, [x1]!, x2! */
ret

END (__folly_memcpy_aarch64_mops)
21 changes: 21 additions & 0 deletions folly/external/aor/memmove-mops.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/*
* memmove using MOPS extension.
*
* Copyright (c) 2023, Arm Limited.
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/

#include "asmdefs.h"

ENTRY (__folly_memmove_aarch64_mops)
PTR_ARG (0)
PTR_ARG (1)
SIZE_ARG (2)

mov x3, x0
.inst 0x1d010443 /* cpyp [x3]!, [x1]!, x2! */
.inst 0x1d410443 /* cpym [x3]!, [x1]!, x2! */
.inst 0x1d810443 /* cpye [x3]!, [x1]!, x2! */
ret

END (__folly_memmove_aarch64_mops)
9 changes: 2 additions & 7 deletions folly/external/aor/memset-advsimd.S
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#define dstend x4
#define zva_val x5

ENTRY (__folly_memset)
ENTRY (__folly_memset_aarch64_simd)
PTR_ARG (0)
SIZE_ARG (2)

Expand Down Expand Up @@ -113,9 +113,4 @@ L(no_zva_loop):
stp q0, q0, [dstend, -32]
ret

END (__folly_memset)

#ifdef FOLLY_MEMSET_IS_MEMSET
.weak memset
memset = __folly_memset
#endif
END (__folly_memset_aarch64_simd)
20 changes: 20 additions & 0 deletions folly/external/aor/memset-mops.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* memset using MOPS extension.
*
* Copyright (c) 2023, Arm Limited.
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/

#include "asmdefs.h"

ENTRY (__folly_memset_aarch64_mops)
PTR_ARG (0)
SIZE_ARG (2)

mov x3, x0
.inst 0x19c10443 /* setp [x3]!, x2!, x1 */
.inst 0x19c14443 /* setm [x3]!, x2!, x1 */
.inst 0x19c18443 /* sete [x3]!, x2!, x1 */
ret

END (__folly_memset_aarch64_mops)
36 changes: 34 additions & 2 deletions folly/memcpy_select_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,40 @@

#include <asm/hwcap.h> // @manual

#if defined(__has_include)
#if __has_include(<sys/ifunc.h>)
#include <sys/ifunc.h>
#endif
#endif

#if !defined(HWCAP2_MOPS)
#define HWCAP2_MOPS (1UL << 43)
#endif

extern "C" {

void* __folly_memcpy_aarch64(void* dst, const void* src, std::size_t size);
void* __folly_memcpy_aarch64_mops(void* dst, const void* src, std::size_t size);
void* __folly_memcpy_aarch64_simd(void* dst, const void* src, std::size_t size);
void* __folly_memcpy_aarch64_sve(void* dst, const void* src, std::size_t size);

void* __folly_memmove_aarch64(void* dst, const void* src, std::size_t len);
void* __folly_memmove_aarch64_mops(void* dst, const void* src, std::size_t len);
void* __folly_memmove_aarch64_simd(void* dst, const void* src, std::size_t len);
void* __folly_memmove_aarch64_sve(void* dst, const void* src, std::size_t len);

[[gnu::no_sanitize_address]]
decltype(&__folly_memcpy_aarch64) __folly_detail_memcpy_resolve(
uint64_t hwcaps) {
uint64_t hwcaps, const void* arg2) {
#if defined(_IFUNC_ARG_HWCAP)
if (hwcaps & _IFUNC_ARG_HWCAP && arg2 != nullptr) {
const __ifunc_arg_t* args = reinterpret_cast<const __ifunc_arg_t*>(arg2);
if (args->_hwcap2 & HWCAP2_MOPS) {
return __folly_memcpy_aarch64_mops;
}
}
#endif

if (hwcaps & HWCAP_SVE) {
return __folly_memcpy_aarch64_sve;
}
Expand All @@ -79,8 +101,18 @@ decltype(&__folly_memcpy_aarch64) __folly_detail_memcpy_resolve(
return __folly_memcpy_aarch64;
}

[[gnu::no_sanitize_address]]
decltype(&__folly_memmove_aarch64) __folly_detail_memmove_resolve(
uint64_t hwcaps) {
uint64_t hwcaps, const void* arg2) {
#if defined(_IFUNC_ARG_HWCAP)
if (hwcaps & _IFUNC_ARG_HWCAP && arg2 != nullptr) {
const __ifunc_arg_t* args = reinterpret_cast<const __ifunc_arg_t*>(arg2);
if (args->_hwcap2 & HWCAP2_MOPS) {
return __folly_memmove_aarch64_mops;
}
}
#endif

if (hwcaps & HWCAP_SVE) {
return __folly_memmove_aarch64_sve;
}
Expand Down
72 changes: 72 additions & 0 deletions folly/memset_select_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*
* How on earth does this work?
*
* See memcpy_select_aarch64.cpp for a full discussion.
*/

#include <cstddef>
#include <cstdint>

#if defined(__linux__) && defined(__aarch64__)

#include <asm/hwcap.h> // @manual

#if defined(__has_include)
#if __has_include(<sys/ifunc.h>)
#include <sys/ifunc.h>
#endif
#endif

#if !defined(HWCAP2_MOPS)
#define HWCAP2_MOPS (1UL << 43)
#endif

extern "C" {

void* __folly_memset_aarch64_mops(void* dest, int ch, std::size_t count);
void* __folly_memset_aarch64_simd(void* dest, int ch, std::size_t count);

[[gnu::no_sanitize_address]]
decltype(&__folly_memset_aarch64_simd) __folly_detail_memset_resolve(
uint64_t hwcaps, const void* arg2) {
#if defined(_IFUNC_ARG_HWCAP)
if (hwcaps & _IFUNC_ARG_HWCAP && arg2 != nullptr) {
const __ifunc_arg_t* args = reinterpret_cast<const __ifunc_arg_t*>(arg2);
if (args->_hwcap2 & HWCAP2_MOPS) {
return __folly_memset_aarch64_mops;
}
}
#endif

return __folly_memset_aarch64_simd;
}

[[gnu::ifunc("__folly_detail_memset_resolve")]]
void* __folly_memset(void* dest, int ch, std::size_t count);

#ifdef FOLLY_MEMSET_IS_MEMSET

[[gnu::weak, gnu::alias("__folly_memset")]]
void* memset(void* dest, int ch, std::size_t count);

#endif

} // extern "C"

#endif // defined(__linux__) && defined(__aarch64__)

0 comments on commit 4629171

Please sign in to comment.