From 9647b1a027cad3f5d4801b580cbb09a4e4b54cea Mon Sep 17 00:00:00 2001 From: Bram Veenboer Date: Wed, 14 Aug 2024 10:22:51 +0200 Subject: [PATCH 1/7] Cleanup test of cu::DeviceMemory --- tests/test_cu.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_cu.cpp b/tests/test_cu.cpp index 8ada760..0013180 100644 --- a/tests/test_cu.cpp +++ b/tests/test_cu.cpp @@ -80,7 +80,7 @@ TEST_CASE("Test copying cu::DeviceMemory and cu::HostMemory using cu::Stream", } } -TEST_CASE("Test zeroing cu::DeviceMemory", "[zero]") { +TEST_CASE("Test cu::DeviceMemory", "[devicememory]") { cu::init(); cu::Device device(0); cu::Context context(CU_CTX_SCHED_BLOCKING_SYNC, device); @@ -134,7 +134,7 @@ TEST_CASE("Test zeroing cu::DeviceMemory", "[zero]") { CHECK(static_cast(memcmp(src, tgt, size))); } - SECTION("Test cu::RegisteredMemory") { + SECTION("Test cu::DeviceMemory memcpy asynchronously") { const size_t N = 3; const size_t size = N * sizeof(int); @@ -156,7 +156,7 @@ TEST_CASE("Test zeroing cu::DeviceMemory", "[zero]") { stream.memcpyDtoHAsync(tgt, mem, size); stream.synchronize(); - CHECK(data_in == data_out); + CHECK(static_cast(!memcmp(data_in.data(), data_out.data(), size))); } SECTION("Test cu::DeviceMemory with CU_MEMORYTYPE_DEVICE as host pointer") { From a8202f073dbb28e8a0052c2849f57eb733c844ec Mon Sep 17 00:00:00 2001 From: Bram Veenboer Date: Wed, 14 Aug 2024 13:30:15 +0200 Subject: [PATCH 2/7] Add DeviceMemory::memset methods + tests --- include/cudawrappers/cu.hpp | 14 +++++++++++++- tests/test_cu.cpp | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/include/cudawrappers/cu.hpp b/include/cudawrappers/cu.hpp index c9f6b2f..79956f8 100644 --- a/include/cudawrappers/cu.hpp +++ b/include/cudawrappers/cu.hpp @@ -585,7 +585,19 @@ class DeviceMemory : public Wrapper { offset); } - void zero(size_t size) { checkCudaCall(cuMemsetD8(_obj, 0, size)); } + void memset(unsigned char value, size_t size) { + checkCudaCall(cuMemsetD8(_obj, value, size)); + } + + void memset(unsigned short value, size_t size) { + checkCudaCall(cuMemsetD16(_obj, value, size)); + } + + void memset(unsigned int value, size_t size) { + checkCudaCall(cuMemsetD32(_obj, value, size)); + } + + void zero(size_t size) { memset(static_cast(0), size); } const void *parameter() const // used to construct parameter list for launchKernel(); diff --git a/tests/test_cu.cpp b/tests/test_cu.cpp index 0013180..dd07792 100644 --- a/tests/test_cu.cpp +++ b/tests/test_cu.cpp @@ -204,6 +204,39 @@ TEST_CASE("Test cu::DeviceMemory", "[devicememory]") { } } +using TestTypes = std::tuple; +TEMPLATE_LIST_TEST_CASE("Test memset", "[memset]", TestTypes) { + cu::init(); + cu::Device device(0); + cu::Context context(CU_CTX_SCHED_BLOCKING_SYNC, device); + + SECTION("Test memset cu::DeviceMemory synchronously") { + const size_t N = 3; + const size_t size = N * sizeof(TestType); + cu::HostMemory a(size); + cu::HostMemory b(size); + TestType value = 0xAA; + + // Populate the memory with values + TestType* const a_ptr = static_cast(a); + TestType* const b_ptr = static_cast(b); + for (int i = 0; i < N; i++) { + a_ptr[i] = 0; + b_ptr[i] = value; + } + cu::DeviceMemory mem(size); + + cu::Stream stream; + stream.memcpyHtoDAsync(mem, a, size); + stream.synchronize(); + mem.memset(value, N); + stream.memcpyDtoHAsync(b, mem, size); + stream.synchronize(); + + CHECK(static_cast(memcmp(a, b, size))); + } +} + TEST_CASE("Test cu::Stream", "[stream]") { cu::init(); cu::Device device(0); From fb1a6f3957e4bdff7095868e003a0abde08160c2 Mon Sep 17 00:00:00 2001 From: Bram Veenboer Date: Wed, 14 Aug 2024 13:46:56 +0200 Subject: [PATCH 3/7] Add Stream::memsetAsync methods + tests --- include/cudawrappers/cu.hpp | 14 +++++++++++++- tests/test_cu.cpp | 27 ++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/include/cudawrappers/cu.hpp b/include/cudawrappers/cu.hpp index 79956f8..032ab44 100644 --- a/include/cudawrappers/cu.hpp +++ b/include/cudawrappers/cu.hpp @@ -704,8 +704,20 @@ class Stream : public Wrapper { checkCudaCall(cuMemPrefetchAsync(devPtr, size, dstDevice, _obj)); } + void memsetAsync(DeviceMemory &devPtr, unsigned char value, size_t size) { + checkCudaCall(cuMemsetD8Async(devPtr, value, size, _obj)); + } + + void memsetAsync(DeviceMemory &devPtr, unsigned short value, size_t size) { + checkCudaCall(cuMemsetD16Async(devPtr, value, size, _obj)); + } + + void memsetAsync(DeviceMemory &devPtr, unsigned int value, size_t size) { + checkCudaCall(cuMemsetD32Async(devPtr, value, size, _obj)); + } + void zero(DeviceMemory &devPtr, size_t size) { - checkCudaCall(cuMemsetD8Async(devPtr, 0, size, _obj)); + memsetAsync(devPtr, static_cast(0), size); } void launchKernel(Function &function, unsigned gridX, unsigned gridY, diff --git a/tests/test_cu.cpp b/tests/test_cu.cpp index dd07792..6e6ed40 100644 --- a/tests/test_cu.cpp +++ b/tests/test_cu.cpp @@ -210,7 +210,32 @@ TEMPLATE_LIST_TEST_CASE("Test memset", "[memset]", TestTypes) { cu::Device device(0); cu::Context context(CU_CTX_SCHED_BLOCKING_SYNC, device); - SECTION("Test memset cu::DeviceMemory synchronously") { + SECTION("Test memset cu::DeviceMemory asynchronously") { + const size_t N = 3; + const size_t size = N * sizeof(TestType); + cu::HostMemory a(size); + cu::HostMemory b(size); + TestType value = 0xAA; + + // Populate the memory with values + TestType* const a_ptr = static_cast(a); + TestType* const b_ptr = static_cast(b); + for (int i = 0; i < N; i++) { + a_ptr[i] = 0; + b_ptr[i] = value; + } + cu::DeviceMemory mem(size); + + cu::Stream stream; + stream.memcpyHtoDAsync(mem, a, size); + stream.memsetAsync(mem, value, N); + stream.memcpyDtoHAsync(b, mem, size); + stream.synchronize(); + + CHECK(static_cast(memcmp(a, b, size))); + } + + SECTION("Test zeroing cu::DeviceMemory synchronously") { const size_t N = 3; const size_t size = N * sizeof(TestType); cu::HostMemory a(size); From dc44b6011d1b60b1bab138a1ee203e1b060bef07 Mon Sep 17 00:00:00 2001 From: Bram Veenboer Date: Wed, 14 Aug 2024 13:53:25 +0200 Subject: [PATCH 4/7] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 764ab3b..f71dc19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ project adheres to [Semantic Versioning](http://semver.org/). - Added `cu::Device::getArch()` - Added `cu::DeviceMemory` constructor to create non-owning slice of another `cu::DeviceMemory` object +- Added `cu::DeviceMemory::memset()` +- Added `cu::Stream::memsetAsync()` ### Changed From 3f9316eda1e5952459e69e7fa143ebe414125469 Mon Sep 17 00:00:00 2001 From: Bram Veenboer Date: Wed, 14 Aug 2024 14:06:31 +0200 Subject: [PATCH 5/7] Restore check --- tests/test_cu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cu.cpp b/tests/test_cu.cpp index 6e6ed40..8b71d04 100644 --- a/tests/test_cu.cpp +++ b/tests/test_cu.cpp @@ -156,7 +156,7 @@ TEST_CASE("Test cu::DeviceMemory", "[devicememory]") { stream.memcpyDtoHAsync(tgt, mem, size); stream.synchronize(); - CHECK(static_cast(!memcmp(data_in.data(), data_out.data(), size))); + CHECK(data_in == data_out); } SECTION("Test cu::DeviceMemory with CU_MEMORYTYPE_DEVICE as host pointer") { From 5669d7978608c5bd14380307e9a7d506f9b2804d Mon Sep 17 00:00:00 2001 From: Bram Veenboer Date: Wed, 14 Aug 2024 14:33:32 +0200 Subject: [PATCH 6/7] Add missing include --- tests/test_cu.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_cu.cpp b/tests/test_cu.cpp index 8b71d04..6d2c6c6 100644 --- a/tests/test_cu.cpp +++ b/tests/test_cu.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include From 79611916b804102ac76411c445b388ed2107e222 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 Aug 2024 12:38:59 +0000 Subject: [PATCH 7/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_cu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cu.cpp b/tests/test_cu.cpp index 6d2c6c6..e383b0f 100644 --- a/tests/test_cu.cpp +++ b/tests/test_cu.cpp @@ -1,6 +1,6 @@ #include -#include #include +#include #include #include #include