diff --git a/CHANGELOG.md b/CHANGELOG.md index 764ab3b..f71dc19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ project adheres to [Semantic Versioning](http://semver.org/). - Added `cu::Device::getArch()` - Added `cu::DeviceMemory` constructor to create non-owning slice of another `cu::DeviceMemory` object +- Added `cu::DeviceMemory::memset()` +- Added `cu::Stream::memsetAsync()` ### Changed diff --git a/include/cudawrappers/cu.hpp b/include/cudawrappers/cu.hpp index c9f6b2f..032ab44 100644 --- a/include/cudawrappers/cu.hpp +++ b/include/cudawrappers/cu.hpp @@ -585,7 +585,19 @@ class DeviceMemory : public Wrapper { offset); } - void zero(size_t size) { checkCudaCall(cuMemsetD8(_obj, 0, size)); } + void memset(unsigned char value, size_t size) { + checkCudaCall(cuMemsetD8(_obj, value, size)); + } + + void memset(unsigned short value, size_t size) { + checkCudaCall(cuMemsetD16(_obj, value, size)); + } + + void memset(unsigned int value, size_t size) { + checkCudaCall(cuMemsetD32(_obj, value, size)); + } + + void zero(size_t size) { memset(static_cast(0), size); } const void *parameter() const // used to construct parameter list for launchKernel(); @@ -692,8 +704,20 @@ class Stream : public Wrapper { checkCudaCall(cuMemPrefetchAsync(devPtr, size, dstDevice, _obj)); } + void memsetAsync(DeviceMemory &devPtr, unsigned char value, size_t size) { + checkCudaCall(cuMemsetD8Async(devPtr, value, size, _obj)); + } + + void memsetAsync(DeviceMemory &devPtr, unsigned short value, size_t size) { + checkCudaCall(cuMemsetD16Async(devPtr, value, size, _obj)); + } + + void memsetAsync(DeviceMemory &devPtr, unsigned int value, size_t size) { + checkCudaCall(cuMemsetD32Async(devPtr, value, size, _obj)); + } + void zero(DeviceMemory &devPtr, size_t size) { - checkCudaCall(cuMemsetD8Async(devPtr, 0, size, _obj)); + memsetAsync(devPtr, static_cast(0), size); } void launchKernel(Function &function, unsigned gridX, unsigned gridY, diff --git a/tests/test_cu.cpp b/tests/test_cu.cpp index 8ada760..e383b0f 100644 --- a/tests/test_cu.cpp +++ b/tests/test_cu.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -80,7 +81,7 @@ TEST_CASE("Test copying cu::DeviceMemory and cu::HostMemory using cu::Stream", } } -TEST_CASE("Test zeroing cu::DeviceMemory", "[zero]") { +TEST_CASE("Test cu::DeviceMemory", "[devicememory]") { cu::init(); cu::Device device(0); cu::Context context(CU_CTX_SCHED_BLOCKING_SYNC, device); @@ -134,7 +135,7 @@ TEST_CASE("Test zeroing cu::DeviceMemory", "[zero]") { CHECK(static_cast(memcmp(src, tgt, size))); } - SECTION("Test cu::RegisteredMemory") { + SECTION("Test cu::DeviceMemory memcpy asynchronously") { const size_t N = 3; const size_t size = N * sizeof(int); @@ -204,6 +205,64 @@ TEST_CASE("Test zeroing cu::DeviceMemory", "[zero]") { } } +using TestTypes = std::tuple; +TEMPLATE_LIST_TEST_CASE("Test memset", "[memset]", TestTypes) { + cu::init(); + cu::Device device(0); + cu::Context context(CU_CTX_SCHED_BLOCKING_SYNC, device); + + SECTION("Test memset cu::DeviceMemory asynchronously") { + const size_t N = 3; + const size_t size = N * sizeof(TestType); + cu::HostMemory a(size); + cu::HostMemory b(size); + TestType value = 0xAA; + + // Populate the memory with values + TestType* const a_ptr = static_cast(a); + TestType* const b_ptr = static_cast(b); + for (int i = 0; i < N; i++) { + a_ptr[i] = 0; + b_ptr[i] = value; + } + cu::DeviceMemory mem(size); + + cu::Stream stream; + stream.memcpyHtoDAsync(mem, a, size); + stream.memsetAsync(mem, value, N); + stream.memcpyDtoHAsync(b, mem, size); + stream.synchronize(); + + CHECK(static_cast(memcmp(a, b, size))); + } + + SECTION("Test zeroing cu::DeviceMemory synchronously") { + const size_t N = 3; + const size_t size = N * sizeof(TestType); + cu::HostMemory a(size); + cu::HostMemory b(size); + TestType value = 0xAA; + + // Populate the memory with values + TestType* const a_ptr = static_cast(a); + TestType* const b_ptr = static_cast(b); + for (int i = 0; i < N; i++) { + a_ptr[i] = 0; + b_ptr[i] = value; + } + cu::DeviceMemory mem(size); + + cu::Stream stream; + stream.memcpyHtoDAsync(mem, a, size); + stream.synchronize(); + mem.memset(value, N); + stream.memcpyDtoHAsync(b, mem, size); + stream.synchronize(); + + CHECK(static_cast(memcmp(a, b, size))); + } +} + TEST_CASE("Test cu::Stream", "[stream]") { cu::init(); cu::Device device(0);