diff --git a/catch/unit/memory/CMakeLists.txt b/catch/unit/memory/CMakeLists.txt index 0ce102ed9..eea84d55c 100644 --- a/catch/unit/memory/CMakeLists.txt +++ b/catch/unit/memory/CMakeLists.txt @@ -40,7 +40,9 @@ set(TEST_SRC hipMemcpyParam2D.cc hipMemcpyParam2DAsync.cc hipMemcpy2D.cc + hipMemcpy2D_old.cc hipMemcpy2DAsync.cc + hipMemcpy2DAsync_old.cc hipMemcpy2DFromArray.cc hipMemcpy2DFromArray_old.cc hipMemcpy2DFromArrayAsync.cc diff --git a/catch/unit/memory/hipMemcpy2D.cc b/catch/unit/memory/hipMemcpy2D.cc index d7e21e42f..2ae89fc5c 100644 --- a/catch/unit/memory/hipMemcpy2D.cc +++ b/catch/unit/memory/hipMemcpy2D.cc @@ -1,496 +1,151 @@ /* -Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** - * @addtogroup hipMemcpy2D hipMemcpy2D - * @{ - * @ingroup MemcpyTest - * `hipMemcpy2D(void* dst, size_t dpitch, const void* src, - * size_t spitch, size_t width, size_t height, - * hipMemcpyKind kind)` - - * Copies data between host and device. - */ - -// Testcase Description: -// 1) Verifies the working of Memcpy2D API negative scenarios by -// Pass NULL to destination pointer -// Pass NULL to Source pointer -// Pass width greater than spitch/dpitch -// 2) Verifies hipMemcpy2D API by -// pass 0 to destionation pitch -// pass 0 to source pitch -// pass 0 to width -// pass 0 to height -// 3) Verifies working of Memcpy2D API on host memory and pinned host memory by -// performing D2H, D2D and H2D memory kind copies on same GPU -// 4) Verifies working of Memcpy2D API for the following scenarios -// H2D-D2D-D2H on host and device memory -// H2D-D2D-D2H on pinned host and device memory -// H2D-D2D-D2H functionalities where memory is allocated in GPU-0 -// and API is triggered from GPU-1 +#include "memcpy2d_tests_common.hh" #include -#include - -static constexpr auto NUM_W{16}; -static constexpr auto NUM_H{16}; -static constexpr auto COLUMNS{8}; -static constexpr auto ROWS{8}; +#include +#include +#include -/** - * Test Description - * ------------------------ - * - This testcases performs the following scenarios of hipMemcpy2D API on same GPU - 1. H2D-D2D-D2H for Host Memory<-->Device Memory - 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory +TEST_CASE("Unit_hipMemcpy2D_Positive_Basic") { + constexpr bool async = false; - Input : "A_h" initialized based on data type - "A_h" --> "A_d" using H2D copy - "A_d" --> "B_d" using D2D copy - "B_d" --> "B_h" using D2H copy - Output: Validating A_h with B_h both should be equal for - the number of COLUMNS and ROWS copied - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2D.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ + SECTION("Device to Host") { Memcpy2DDeviceToHostShell(hipMemcpy2D); } -TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H", "" - , int, float, double) { - CHECK_IMAGE_SUPPORT - // 1 refers to pinned host memory - auto mem_type = GENERATE(0, 1); - HIP_CHECK(hipSetDevice(0)); - TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}, - *B_d{nullptr}; - size_t pitch_A, pitch_B; - size_t width{NUM_W * sizeof(TestType)}; - - // Allocating memory - if (mem_type) { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, true); - } else { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + SECTION("Device to Device") { + SECTION("Peer access disabled") { Memcpy2DDeviceToDeviceShell(hipMemcpy2D); } + SECTION("Peer access enabled") { Memcpy2DDeviceToDeviceShell(hipMemcpy2D); } } - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - HIP_CHECK(hipMallocPitch(reinterpret_cast(&B_d), - &pitch_B, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); - - // Host to Device - HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyHostToDevice)); - - // Performs D2D on same GPU device - HIP_CHECK(hipMemcpy2D(B_d, pitch_B, A_d, - pitch_A, COLUMNS*sizeof(TestType), - ROWS, hipMemcpyDeviceToDevice)); - // hipMemcpy2D Device to Host - HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B, - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyDeviceToHost)); + SECTION("Host to Device") { Memcpy2DHostToDeviceShell(hipMemcpy2D); } - // Validating the result - REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipFree(B_d)); - if (mem_type) { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, true); - } else { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); - } + SECTION("Host to Host") { Memcpy2DHostToHostShell(hipMemcpy2D); } } -/** - * Test Description - * ------------------------ - * - This testcase performs the following scenarios of hipMemcpy2D API on same GPU. - 1. H2D-D2D-D2H for Host Memory<-->Device Memory - 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory - The src and dst input pointers to hipMemCpy2D add an offset to the pointers - returned by the allocation functions. - - Input : "A_h" initialized based on data type - "A_h" --> "A_d" using H2D copy - "A_d" --> "B_d" using D2D copy - "B_d" --> "B_h" using D2H copy - Output: Validating A_h with B_h both should be equal for - the number of COLUMNS and ROWS copied - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2D.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ +TEST_CASE("Unit_hipMemcpy2D_Positive_Synchronization_Behavior") { + HIP_CHECK(hipDeviceSynchronize()); -TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H_WithOffset", "" - , int, float, double) { - CHECK_IMAGE_SUPPORT - // 1 refers to pinned host memory - auto mem_type = GENERATE(0, 1); - HIP_CHECK(hipSetDevice(0)); - TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}, - *B_d{nullptr}; - size_t pitch_A, pitch_B; - size_t width{NUM_W * sizeof(TestType)}; + SECTION("Host to Device") { Memcpy2DHtoDSyncBehavior(hipMemcpy2D, true); } - // Allocating memory - if (mem_type) { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, true); - } else { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + SECTION("Device to Host") { + Memcpy2DDtoHPageableSyncBehavior(hipMemcpy2D, true); + Memcpy2DDtoHPinnedSyncBehavior(hipMemcpy2D, true); } - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - HIP_CHECK(hipMallocPitch(reinterpret_cast(&B_d), - &pitch_B, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); - - // Host to Device - HIP_CHECK(hipMemcpy2D(A_d+COLUMNS*sizeof(TestType), pitch_A, A_h, - COLUMNS*sizeof(TestType), COLUMNS*sizeof(TestType), - ROWS, hipMemcpyHostToDevice)); - - // Performs D2D on same GPU device - HIP_CHECK(hipMemcpy2D(B_d+COLUMNS*sizeof(TestType), pitch_B, - A_d+COLUMNS*sizeof(TestType), - pitch_A, COLUMNS*sizeof(TestType), - ROWS, hipMemcpyDeviceToDevice)); - - // hipMemcpy2D Device to Host - HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), - B_d+COLUMNS*sizeof(TestType), pitch_B, - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyDeviceToHost)); - - - // Validating the result - REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipFree(B_d)); - if (mem_type) { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, true); - } else { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); + SECTION("Device to Device") { +#if HT_NVIDIA + Memcpy2DDtoDSyncBehavior(hipMemcpy2D, false); +#else + Memcpy2DDtoDSyncBehavior(hipMemcpy2D, true); +#endif } -} - -/** - * Test Description - * ------------------------ - * - This testcases performs the following scenarios of hipMemcpy2D API on Peer GPU - 1. H2D-D2D-D2H for Host Memory<-->Device Memory - 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory - 3. Device context change where memory is allocated in GPU-0 - and API is trigerred from GPU-1 - - Input : "A_h" initialized based on data type - "A_h" --> "A_d" using H2D copy - "A_d" --> "X_d" using D2D copy - "X_d" --> "B_h" using D2H copy - Output: Validating A_h with B_h both should be equal for - the number of COLUMNS and ROWS copied - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2D.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ - -TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_multiDevice-D2D", "" - , int, float, double) { - CHECK_IMAGE_SUPPORT - auto mem_type = GENERATE(0, 1); - int numDevices = 0; - int canAccessPeer = 0; - TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(TestType)}; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1)); - if (canAccessPeer) { - HIP_CHECK(hipSetDevice(0)); - - // Allocating memory - if (mem_type) { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, true); - } else { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, false); - } - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); - - char *X_d{nullptr}; - size_t pitch_X; - HIP_CHECK(hipMallocPitch(reinterpret_cast(&X_d), - &pitch_X, width, NUM_H)); - - // Change device - HIP_CHECK(hipSetDevice(1)); - // Host to Device - HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), - COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice)); - - // Device to Device - HIP_CHECK(hipMemcpy2D(X_d, pitch_X, A_d, - pitch_A, COLUMNS*sizeof(TestType), - ROWS, hipMemcpyDeviceToDevice)); - - // Device to Host - HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), X_d, - pitch_X, COLUMNS*sizeof(TestType), ROWS, hipMemcpyDeviceToHost)); - - // Validating the result - REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - if (mem_type) { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, true); - } else { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); - } - HIP_CHECK(hipFree(X_d)); - } else { - SUCCEED("Machine does not seem to have P2P"); - } - } else { - SUCCEED("skipped the testcase as no of devices is less than 2"); - } +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-232 + SECTION("Host to Host") { Memcpy2DHtoHSyncBehavior(hipMemcpy2D, true); } +#endif } -/** - * Test Description - * ------------------------ - * - This Testcase verifies the null size checks of hipMemcpy2D API - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2D.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ - -TEST_CASE("Unit_hipMemcpy2D_SizeCheck") { - CHECK_IMAGE_SUPPORT - HIP_CHECK(hipSetDevice(0)); - int* A_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(int)}; - - // Allocating memory - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, nullptr, NUM_W*NUM_H); - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); - - SECTION("hipMemcpy2D API where Source Pitch is zero") { - REQUIRE(hipMemcpy2D(A_h, 0, A_d, - pitch_A, NUM_W, NUM_H, - hipMemcpyDeviceToHost) != hipSuccess); - } - - SECTION("hipMemcpy2D API where Destination Pitch is zero") { - REQUIRE(hipMemcpy2D(A_h, width, A_d, - 0, NUM_W, NUM_H, - hipMemcpyDeviceToHost) != hipSuccess); - } - - SECTION("hipMemcpy2D API where height is zero") { - REQUIRE(hipMemcpy2D(A_h, width, A_d, - pitch_A, NUM_W, 0, - hipMemcpyDeviceToHost) == hipSuccess); - } - - SECTION("hipMemcpy2D API where width is zero") { - REQUIRE(hipMemcpy2D(A_h, width, A_d, - pitch_A, 0, NUM_H, - hipMemcpyDeviceToHost) == hipSuccess); - } - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - free(A_h); +TEST_CASE("Unit_hipMemcpy2D_Positive_Parameters") { + constexpr bool async = false; + Memcpy2DZeroWidthHeight(hipMemcpy2D); } -/** - * Test Description - * ------------------------ - * - This Testcase verifies all the negative scenarios of hipMemcpy2D API - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2D.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ - -TEST_CASE("Unit_hipMemcpy2D_Negative") { - CHECK_IMAGE_SUPPORT - HIP_CHECK(hipSetDevice(0)); - int* A_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(int)}; - - // Allocating memory - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, nullptr, NUM_W*NUM_H); - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); - - SECTION("hipMemcpy2D API by Passing nullptr to destination") { - REQUIRE(hipMemcpy2D(nullptr, width, A_d, - pitch_A, COLUMNS*sizeof(int), ROWS, - hipMemcpyDeviceToHost) != hipSuccess); - } - - SECTION("hipMemcpy2D API by Passing nullptr to destination") { - REQUIRE(hipMemcpy2D(nullptr, width, nullptr, - pitch_A, COLUMNS*sizeof(int), ROWS, - hipMemcpyDeviceToHost) != hipSuccess); - } - - SECTION("hipMemcpy2D API where width is greater than destination pitch") { - REQUIRE(hipMemcpy2D(A_h, 10, A_d, pitch_A, - COLUMNS*sizeof(int), ROWS, - hipMemcpyDeviceToHost) != hipSuccess); - } +TEST_CASE("Unit_hipMemcpy2D_Negative_Parameters") { + constexpr size_t cols = 128; + constexpr size_t rows = 128; - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - free(A_h); -} + constexpr auto NegativeTests = [](void* dst, size_t dpitch, const void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind) { + SECTION("dst == nullptr") { + HIP_CHECK_ERROR(hipMemcpy2D(nullptr, dpitch, src, spitch, width, height, kind), + hipErrorInvalidValue); + } -static void hipMemcpy2D_Basic_Size_Test(size_t inc) { - constexpr int defaultProgramSize = 256 * 1024 * 1024; - constexpr int N = 2; - constexpr int value = 42; - int *in, *out, *dev; - size_t newSize = 0, inp = 0; - size_t size = sizeof(int) * N * inc; + SECTION("src == nullptr") { + HIP_CHECK_ERROR(hipMemcpy2D(dst, dpitch, nullptr, spitch, width, height, kind), + hipErrorInvalidValue); + } - size_t free, total; - HIP_CHECK(hipMemGetInfo(&free, &total)); + SECTION("dpitch < width") { + HIP_CHECK_ERROR(hipMemcpy2D(dst, width - 1, src, spitch, width, height, kind), + hipErrorInvalidPitchValue); + } - if ( free < 2 * size ) - newSize = ( free - defaultProgramSize ) / 2; - else - newSize = size; + SECTION("spitch < width") { + HIP_CHECK_ERROR(hipMemcpy2D(dst, dpitch, src, width - 1, width, height, kind), + hipErrorInvalidPitchValue); + } - INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes."); - INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes"); - INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes"); + SECTION("dpitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + HIP_CHECK_ERROR( + hipMemcpy2D(dst, static_cast(attr) + 1, src, spitch, width, height, kind), + hipErrorInvalidValue); + } - HIP_CHECK(hipHostMalloc(&in, newSize)); - HIP_CHECK(hipHostMalloc(&out, newSize)); - HIP_CHECK(hipMalloc(&dev, newSize)); + SECTION("spitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + HIP_CHECK_ERROR( + hipMemcpy2D(dst, dpitch, src, static_cast(attr) + 1, width, height, kind), + hipErrorInvalidValue); + } - inp = newSize / (sizeof(int) * N); - for (size_t i=0; i < N; i++) { - in[i * inp] = value; +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-234 + SECTION("Invalid MemcpyKind") { + HIP_CHECK_ERROR( + hipMemcpy2D(dst, dpitch, src, spitch, width, height, static_cast(-1)), + hipErrorInvalidMemcpyDirection); + } +#endif + }; + + SECTION("Host to Device") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows); + NegativeTests(device_alloc.ptr(), device_alloc.pitch(), host_alloc.ptr(), device_alloc.pitch(), + device_alloc.width(), device_alloc.height(), hipMemcpyHostToDevice); } - size_t pitch = sizeof(int) * inp; - - HIP_CHECK(hipMemcpy2D(dev, pitch, in, pitch, sizeof(int), - N, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy2D(out, pitch, dev, pitch, sizeof(int), - N, hipMemcpyDeviceToHost)); - - for (size_t i=0; i < N; i++) { - REQUIRE(out[i * inp] == value); + SECTION("Device to Host") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows); + NegativeTests(host_alloc.ptr(), device_alloc.pitch(), device_alloc.ptr(), device_alloc.pitch(), + device_alloc.width(), device_alloc.height(), hipMemcpyDeviceToHost); } - HIP_CHECK(hipFree(dev)); - HIP_CHECK(hipHostFree(in)); - HIP_CHECK(hipHostFree(out)); -} - -/** - * Test Description - * ------------------------ - * - This testcase performs multidevice size check on hipMemcpy2D API - 1. Verify hipMemcpy2D with 1 << 20 size - 2. Verify hipMemcpy2D with 1 << 21 size - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2D.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ - -TEST_CASE("Unit_hipMemcpy2D_multiDevice_Basic_Size_Test") { - CHECK_IMAGE_SUPPORT - size_t input = 1 << 20; - int numDevices = 0; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - - for (int i=0; i < numDevices; i++) { - HIP_CHECK(hipSetDevice(i)); + SECTION("Host to Host") { + LinearAllocGuard src_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int)); + LinearAllocGuard dst_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int)); + NegativeTests(dst_alloc.ptr(), cols * sizeof(int), src_alloc.ptr(), cols * sizeof(int), + cols * sizeof(int), rows, hipMemcpyHostToHost); + } - SECTION("Verify hipMemcpy2D with 1 << 20 size") { - hipMemcpy2D_Basic_Size_Test(input); - } - SECTION("Verify hipMemcpy2D with 1 << 21 size") { - input <<= 1; - hipMemcpy2D_Basic_Size_Test(input); - } + SECTION("Device to Device") { + LinearAllocGuard2D src_alloc(cols, rows); + LinearAllocGuard2D dst_alloc(cols, rows); + NegativeTests(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(), + dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice); } } diff --git a/catch/unit/memory/hipMemcpy2DAsync.cc b/catch/unit/memory/hipMemcpy2DAsync.cc index 1ca39bd6c..4639993b9 100644 --- a/catch/unit/memory/hipMemcpy2DAsync.cc +++ b/catch/unit/memory/hipMemcpy2DAsync.cc @@ -1,555 +1,188 @@ /* -Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** - * @addtogroup hipMemcpy2DAsync hipMemcpy2DAsync - * @{ - * @ingroup MemcpyTest - * `hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, - * size_t spitch, size_t width, size_t height, - * hipMemcpyKind kind, hipStream_t stream = 0 )` - - * Copies data between host and device. - */ - -// Testcase Description: -// 1) Verifies the working of Memcpy2DAsync API negative scenarios by -// Pass NULL to destination pointer -// Pass NULL to Source pointer -// Pass width greater than spitch/dpitch -// 2) Verifies hipMemcpy2DAsync API by -// pass 0 to destionation pitch -// pass 0 to source pitch -// pass 0 to width -// pass 0 to height -// 3) Verifies working of Memcpy2DAsync API on host memory -// and pinned host memory by -// performing D2H, D2D and H2D memory kind copies on same GPU -// 4) Verifies working of Memcpy2DAsync API on host memory -// and pinned host memory by -// performing D2H, D2D and H2D memory kind copies on peer GPU -// 5) Verifies working of Memcpy2DAsync API where memory is allocated -// in GPU-0 and stream is created on GPU-1 +#include "memcpy2d_tests_common.hh" #include -#include - -static constexpr auto NUM_W{16}; -static constexpr auto NUM_H{16}; -static constexpr auto COLUMNS{6}; -static constexpr auto ROWS{6}; +#include +#include +#include -/** - * Test Description - * ------------------------ - * - This performs the following scenarios of hipMemcpy2DAsync API on same GPU - 1. H2D-D2D-D2H for Host Memory<-->Device Memory - 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory +TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Basic") { + using namespace std::placeholders; - Input : "A_h" initialized based on data type - "A_h" --> "A_d" using H2D copy - "A_d" --> "B_d" using D2D copy - "B_d" --> "B_h" using D2H copy - Output: Validating A_h with B_h both should be equal for - the number of COLUMNS and ROWS copied - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2DAsync.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ + constexpr bool async = true; -TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_Host&PinnedMem", "" - , int, float, double) { - CHECK_IMAGE_SUPPORT - // 1 refers to pinned host memory - auto mem_type = GENERATE(0, 1); - HIP_CHECK(hipSetDevice(0)); - TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}, - *B_d{nullptr}; - size_t pitch_A, pitch_B; - size_t width{NUM_W * sizeof(TestType)}; - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); + const auto stream_type = GENERATE(Streams::nullstream, Streams::perThread, Streams::created); + const StreamGuard stream_guard(stream_type); + const hipStream_t stream = stream_guard.stream(); - // Allocating memory - if (mem_type) { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, true); - } else { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + SECTION("Device to Host") { + Memcpy2DDeviceToHostShell( + std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream); } - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - HIP_CHECK(hipMallocPitch(reinterpret_cast(&B_d), - &pitch_B, width, NUM_H)); - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); - SECTION("Calling Async apis with stream object created by user") { - // Host to Device - HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyHostToDevice, stream)); - - // Performs D2D on same GPU device - HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, - pitch_A, COLUMNS*sizeof(TestType), - ROWS, hipMemcpyDeviceToDevice, stream)); - - // hipMemcpy2DAsync Device to Host - HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B, - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyDeviceToHost, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); + SECTION("Device to Device") { + SECTION("Peer access disabled") { + Memcpy2DDeviceToDeviceShell( + std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream); + } + SECTION("Peer access enabled") { + Memcpy2DDeviceToDeviceShell( + std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream); + } } - SECTION("Calling Async apis with hipStreamPerThread") { - // Host to Device - HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyHostToDevice, hipStreamPerThread)); - - // Performs D2D on same GPU device - HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, pitch_A, - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyDeviceToDevice, hipStreamPerThread)); - // hipMemcpy2DAsync Device to Host - HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B, - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyDeviceToHost, hipStreamPerThread)); - HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + SECTION("Host to Device") { + Memcpy2DHostToDeviceShell( + std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream); } - // Validating the result - REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); - - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipFree(B_d)); - if (mem_type) { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, true); - } else { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); + SECTION("Host to Host") { + Memcpy2DHostToHostShell(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), + stream); } - HIP_CHECK(hipStreamDestroy(stream)); } -/** - * Test Description - * ------------------------ - * - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU - 1. H2D-D2D-D2H for Host Memory<-->Device Memory - 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory - - Input : "A_h" initialized based on data type - "A_h" --> "A_d" using H2D copy - "A_d" --> "X_d" using D2D copy - "X_d" --> "B_h" using D2H copy - Output: Validating A_h with B_h both should be equal for - the number of COLUMNS and ROWS copied - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2DAsync.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ - -TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-Host&PinnedMem", "" - , int, float, double) { - CHECK_IMAGE_SUPPORT - auto mem_type = GENERATE(0, 1); - int numDevices = 0; - int canAccessPeer = 0; - TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(TestType)}; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - hipStream_t stream; - - if (numDevices > 1) { - HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1)); - if (canAccessPeer) { - HIP_CHECK(hipSetDevice(0)); - HIP_CHECK(hipStreamCreate(&stream)); - - // Allocating memory - if (mem_type) { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, true); - } else { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, false); - } - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); - - // Host to Device - HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), - COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream)); - - // Change device - HIP_CHECK(hipSetDevice(1)); +TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Synchronization_Behavior") { + using namespace std::placeholders; - char *X_d{nullptr}; - size_t pitch_X; - HIP_CHECK(hipMallocPitch(reinterpret_cast(&X_d), - &pitch_X, width, NUM_H)); + HIP_CHECK(hipDeviceSynchronize()); - // Device to Device - HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d, - pitch_A, COLUMNS*sizeof(TestType), - ROWS, hipMemcpyDeviceToDevice, stream)); - - // Device to Host - HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d, - pitch_X, COLUMNS*sizeof(TestType), ROWS, - hipMemcpyDeviceToHost, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); - - // Validating the result - REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - if (mem_type) { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, true); - } else { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); - } - HIP_CHECK(hipFree(X_d)); - HIP_CHECK(hipStreamDestroy(stream)); - } else { - SUCCEED("Machine does not seem to have P2P"); - } - } else { - SUCCEED("skipped the testcase as no of devices is less than 2"); + SECTION("Host to Device") { + Memcpy2DHtoDSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr), + false); } -} - -/** - * Test Description - * ------------------------ - * - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU - 1. H2D-D2D-D2H for Host Memory<-->Device Memory - 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory - Memory is allocated in GPU-0 and Stream is created in GPU-1 - - Input : "A_h" initialized based on data type - "A_h" --> "A_d" using H2D copy - "A_d" --> "X_d" using D2D copy - "X_d" --> "B_h" using D2H copy - Output: Validating A_h with B_h both should be equal for - the number of COLUMNS and ROWS copied - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2DAsync.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ - -TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-StreamOnDiffDevice", "" - , int, float, double) { - CHECK_IMAGE_SUPPORT - auto mem_type = GENERATE(0, 1); - int numDevices = 0; - int canAccessPeer = 0; - TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(TestType)}; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - hipStream_t stream; - - if (numDevices > 1) { - HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1)); - if (canAccessPeer) { - HIP_CHECK(hipSetDevice(0)); - - // Allocating memory - if (mem_type) { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, true); - } else { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, false); - } - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - char *X_d{nullptr}; - size_t pitch_X; - HIP_CHECK(hipMallocPitch(reinterpret_cast(&X_d), - &pitch_X, width, NUM_H)); - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); - - // Change device - HIP_CHECK(hipSetDevice(1)); - HIP_CHECK(hipStreamCreate(&stream)); - - // Host to Device - HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), - COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream)); - - // Device to Device - HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d, - pitch_A, COLUMNS*sizeof(TestType), - ROWS, hipMemcpyDeviceToDevice, stream)); - - // Device to Host - HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d, - pitch_X, COLUMNS*sizeof(TestType), ROWS, - hipMemcpyDeviceToHost, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); - - // Validating the result - REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - if (mem_type) { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, true); - } else { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); - } - HIP_CHECK(hipFree(X_d)); - HIP_CHECK(hipStreamDestroy(stream)); - } else { - SUCCEED("Machine does not seem to have P2P"); - } - } else { - SUCCEED("skipped the testcase as no of devices is less than 2"); +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233 + SECTION("Device to Pageable Host") { + Memcpy2DDtoHPageableSyncBehavior( + std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr), true); } -} - -/** - * Test Description - * ------------------------ - * - This testcase verifies the null checks of hipMemcpy2DAsync API - 1. hipMemcpy2DAsync API where Source Pitch is zero - 2. hipMemcpy2DAsync API where Destination Pitch is zero - 3. hipMemcpy2DAsync API where height is zero - 4. hipMemcpy2DAsync API where width is zero - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2DAsync.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ +#endif -TEST_CASE("Unit_hipMemcpy2DAsync_SizeCheck") { - CHECK_IMAGE_SUPPORT - HIP_CHECK(hipSetDevice(0)); - int* A_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(int)}; - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); - - // Allocating memory - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, nullptr, NUM_W*NUM_H); - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); - - SECTION("hipMemcpy2DAsync API where Source Pitch is zero") { - REQUIRE(hipMemcpy2DAsync(A_h, 0, A_d, - pitch_A, NUM_W, NUM_H, - hipMemcpyDeviceToHost, stream) != hipSuccess); - } - - SECTION("hipMemcpy2DAsync API where Destination Pitch is zero") { - REQUIRE(hipMemcpy2DAsync(A_h, width, A_d, - 0, NUM_W, NUM_H, - hipMemcpyDeviceToHost, stream) != hipSuccess); + SECTION("Device to Pinned Host") { + Memcpy2DDtoHPinnedSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr), + false); } - SECTION("hipMemcpy2DAsync API where height is zero") { - REQUIRE(hipMemcpy2DAsync(A_h, width, A_d, - pitch_A, NUM_W, 0, - hipMemcpyDeviceToHost, stream) == hipSuccess); + SECTION("Device to Device") { + Memcpy2DDtoDSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr), + false); } - SECTION("hipMemcpy2DAsync API where width is zero") { - REQUIRE(hipMemcpy2DAsync(A_h, width, A_d, - pitch_A, 0, NUM_H, - hipMemcpyDeviceToHost, stream) == hipSuccess); +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233 + SECTION("Host to Host") { + Memcpy2DHtoHSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr), + true); } - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - free(A_h); +#endif } -/** - * Test Description - * ------------------------ - * - This testcase performs the negative scenarios of hipMemcpy2DAsync API - 1. hipMemcpy2DAsync API by Passing nullptr to destination - 2. hipMemcpy2DAsync API by Passing nullptr to source - 3. hipMemcpy2DAsync API where width is > destination pitch - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2DAsync.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ - -TEST_CASE("Unit_hipMemcpy2DAsync_Negative") { - CHECK_IMAGE_SUPPORT - HIP_CHECK(hipSetDevice(0)); - int* A_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(int)}; - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); - - // Allocating memory - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, nullptr, NUM_W*NUM_H); - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); - - SECTION("hipMemcpy2DAsync API by Passing nullptr to destination") { - REQUIRE(hipMemcpy2DAsync(nullptr, width, A_d, - pitch_A, COLUMNS*sizeof(int), ROWS, - hipMemcpyDeviceToHost, stream) != hipSuccess); - } - - SECTION("hipMemcpy2DAsync API by Passing nullptr to source") { - REQUIRE(hipMemcpy2DAsync(A_h, width, nullptr, - pitch_A, COLUMNS*sizeof(int), ROWS, - hipMemcpyDeviceToHost, stream) != hipSuccess); - } - - SECTION("hipMemcpy2DAsync API where width is > destination pitch") { - REQUIRE(hipMemcpy2DAsync(A_h, 10, A_d, pitch_A, - COLUMNS*sizeof(int), ROWS, - hipMemcpyDeviceToHost, stream) != hipSuccess); - } - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipStreamDestroy(stream)); - free(A_h); +TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Parameters") { + using namespace std::placeholders; + constexpr bool async = true; + Memcpy2DZeroWidthHeight(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr)); } -static void hipMemcpy2DAsync_Basic_Size_Test(size_t inc) { - constexpr int defaultProgramSize = 256 * 1024 * 1024; - constexpr int N = 2; - constexpr int value = 42; - int *in, *out, *dev; - size_t newSize = 0, inp = 0; - size_t size = sizeof(int) * N * inc; - - size_t free, total; - HIP_CHECK(hipMemGetInfo(&free, &total)); - - if ( free < 2 * size ) - newSize = ( free - defaultProgramSize ) / 2; - else - newSize = size; - - INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes."); - INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes"); - INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes"); +TEST_CASE("Unit_hipMemcpy2DAsync_Negative_Parameters") { + constexpr size_t cols = 128; + constexpr size_t rows = 128; - HIP_CHECK(hipHostMalloc(&in, newSize)); - HIP_CHECK(hipHostMalloc(&out, newSize)); - HIP_CHECK(hipMalloc(&dev, newSize)); + constexpr auto NegativeTests = [](void* dst, size_t dpitch, const void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind) { + SECTION("dst == nullptr") { + HIP_CHECK_ERROR(hipMemcpy2DAsync(nullptr, dpitch, src, spitch, width, height, kind, nullptr), + hipErrorInvalidValue); + } + SECTION("src == nullptr") { + HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, nullptr, spitch, width, height, kind, nullptr), + hipErrorInvalidValue); + } + SECTION("dpitch < width") { + HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, width - 1, src, spitch, width, height, kind, nullptr), + hipErrorInvalidPitchValue); + } + SECTION("spitch < width") { + HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, src, width - 1, width, height, kind, nullptr), + hipErrorInvalidPitchValue); + } + SECTION("dpitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, static_cast(attr) + 1, src, spitch, width, + height, kind, nullptr), + hipErrorInvalidValue); + } + SECTION("spitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, src, static_cast(attr) + 1, width, + height, kind, nullptr), + hipErrorInvalidValue); + } +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-234 + SECTION("Invalid MemcpyKind") { + HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, + static_cast(-1), nullptr), + hipErrorInvalidMemcpyDirection); + } +#endif +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-235 + SECTION("Invalid stream") { + StreamGuard stream_guard(Streams::created); + HIP_CHECK(hipStreamDestroy(stream_guard.stream())); + HIP_CHECK_ERROR( + hipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream_guard.stream()), + hipErrorContextIsDestroyed); + } +#endif + }; - inp = newSize / (sizeof(int) * N); - for (size_t i=0; i < N; i++) { - in[i * inp] = value; + SECTION("Host to device") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows); + NegativeTests(device_alloc.ptr(), device_alloc.pitch(), host_alloc.ptr(), device_alloc.pitch(), + device_alloc.width(), device_alloc.height(), hipMemcpyHostToDevice); } - size_t pitch = sizeof(int) * inp; - - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); - - HIP_CHECK(hipMemcpy2DAsync(dev, pitch, in, pitch, sizeof(int), - N, hipMemcpyHostToDevice, stream)); - HIP_CHECK(hipMemcpy2DAsync(out, pitch, dev, pitch, sizeof(int), - N, hipMemcpyDeviceToHost, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); - - for (size_t i=0; i < N; i++) { - REQUIRE(out[i * inp] == value); + SECTION("Device to host") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows); + NegativeTests(host_alloc.ptr(), device_alloc.pitch(), device_alloc.ptr(), device_alloc.pitch(), + device_alloc.width(), device_alloc.height(), hipMemcpyDeviceToHost); } - HIP_CHECK(hipFree(dev)); - HIP_CHECK(hipHostFree(in)); - HIP_CHECK(hipHostFree(out)); - HIP_CHECK(hipStreamDestroy(stream)); -} - -/** - * Test Description - * ------------------------ - * - This testcase performs multidevice size check on hipMemcpy2DAsync API - 1. Verify hipMemcpy2DAsync with 1 << 20 size - 2. Verify hipMemcpy2DAsync with 1 << 21 size - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2DAsync.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ - -TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice_Basic_Size_Test") { - CHECK_IMAGE_SUPPORT - size_t input = 1 << 20; - int numDevices = 0; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - - for (int i=0; i < numDevices; i++) { - HIP_CHECK(hipSetDevice(i)); + SECTION("Host to host") { + LinearAllocGuard src_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int)); + LinearAllocGuard dst_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int)); + NegativeTests(dst_alloc.ptr(), cols * sizeof(int), src_alloc.ptr(), cols * sizeof(int), + cols * sizeof(int), rows, hipMemcpyHostToHost); + } - SECTION("Verify hipMemcpy2DAsync with 1 << 20 size") { - hipMemcpy2DAsync_Basic_Size_Test(input); - } - SECTION("Verify hipMemcpy2DAsync with 1 << 21 size") { - input <<= 1; - hipMemcpy2DAsync_Basic_Size_Test(input); - } + SECTION("Device to device") { + LinearAllocGuard2D src_alloc(cols, rows); + LinearAllocGuard2D dst_alloc(cols, rows); + NegativeTests(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(), + dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice); } } diff --git a/catch/unit/memory/hipMemcpy2DAsync_old.cc b/catch/unit/memory/hipMemcpy2DAsync_old.cc new file mode 100644 index 000000000..1ca39bd6c --- /dev/null +++ b/catch/unit/memory/hipMemcpy2DAsync_old.cc @@ -0,0 +1,555 @@ +/* +Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemcpy2DAsync hipMemcpy2DAsync + * @{ + * @ingroup MemcpyTest + * `hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, + * size_t spitch, size_t width, size_t height, + * hipMemcpyKind kind, hipStream_t stream = 0 )` - + * Copies data between host and device. + */ + +// Testcase Description: +// 1) Verifies the working of Memcpy2DAsync API negative scenarios by +// Pass NULL to destination pointer +// Pass NULL to Source pointer +// Pass width greater than spitch/dpitch +// 2) Verifies hipMemcpy2DAsync API by +// pass 0 to destionation pitch +// pass 0 to source pitch +// pass 0 to width +// pass 0 to height +// 3) Verifies working of Memcpy2DAsync API on host memory +// and pinned host memory by +// performing D2H, D2D and H2D memory kind copies on same GPU +// 4) Verifies working of Memcpy2DAsync API on host memory +// and pinned host memory by +// performing D2H, D2D and H2D memory kind copies on peer GPU +// 5) Verifies working of Memcpy2DAsync API where memory is allocated +// in GPU-0 and stream is created on GPU-1 + +#include +#include + +static constexpr auto NUM_W{16}; +static constexpr auto NUM_H{16}; +static constexpr auto COLUMNS{6}; +static constexpr auto ROWS{6}; + +/** + * Test Description + * ------------------------ + * - This performs the following scenarios of hipMemcpy2DAsync API on same GPU + 1. H2D-D2D-D2H for Host Memory<-->Device Memory + 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory + + Input : "A_h" initialized based on data type + "A_h" --> "A_d" using H2D copy + "A_d" --> "B_d" using D2D copy + "B_d" --> "B_h" using D2H copy + Output: Validating A_h with B_h both should be equal for + the number of COLUMNS and ROWS copied + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ + +TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_Host&PinnedMem", "" + , int, float, double) { + CHECK_IMAGE_SUPPORT + // 1 refers to pinned host memory + auto mem_type = GENERATE(0, 1); + HIP_CHECK(hipSetDevice(0)); + TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}, + *B_d{nullptr}; + size_t pitch_A, pitch_B; + size_t width{NUM_W * sizeof(TestType)}; + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + // Allocating memory + if (mem_type) { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, true); + } else { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + } + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&B_d), + &pitch_B, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + SECTION("Calling Async apis with stream object created by user") { + // Host to Device + HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyHostToDevice, stream)); + + // Performs D2D on same GPU device + HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, + pitch_A, COLUMNS*sizeof(TestType), + ROWS, hipMemcpyDeviceToDevice, stream)); + + // hipMemcpy2DAsync Device to Host + HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B, + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + } + SECTION("Calling Async apis with hipStreamPerThread") { + // Host to Device + HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyHostToDevice, hipStreamPerThread)); + + // Performs D2D on same GPU device + HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, pitch_A, + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToDevice, hipStreamPerThread)); + + // hipMemcpy2DAsync Device to Host + HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B, + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToHost, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + } + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); + + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(B_d)); + if (mem_type) { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, true); + } else { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); + } + HIP_CHECK(hipStreamDestroy(stream)); +} + +/** + * Test Description + * ------------------------ + * - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU + 1. H2D-D2D-D2H for Host Memory<-->Device Memory + 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory + + Input : "A_h" initialized based on data type + "A_h" --> "A_d" using H2D copy + "A_d" --> "X_d" using D2D copy + "X_d" --> "B_h" using D2H copy + Output: Validating A_h with B_h both should be equal for + the number of COLUMNS and ROWS copied + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ + +TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-Host&PinnedMem", "" + , int, float, double) { + CHECK_IMAGE_SUPPORT + auto mem_type = GENERATE(0, 1); + int numDevices = 0; + int canAccessPeer = 0; + TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(TestType)}; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + hipStream_t stream; + + if (numDevices > 1) { + HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1)); + if (canAccessPeer) { + HIP_CHECK(hipSetDevice(0)); + HIP_CHECK(hipStreamCreate(&stream)); + + // Allocating memory + if (mem_type) { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, true); + } else { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + } + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + + // Host to Device + HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), + COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream)); + + // Change device + HIP_CHECK(hipSetDevice(1)); + + char *X_d{nullptr}; + size_t pitch_X; + HIP_CHECK(hipMallocPitch(reinterpret_cast(&X_d), + &pitch_X, width, NUM_H)); + + // Device to Device + HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d, + pitch_A, COLUMNS*sizeof(TestType), + ROWS, hipMemcpyDeviceToDevice, stream)); + + // Device to Host + HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d, + pitch_X, COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + if (mem_type) { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, true); + } else { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); + } + HIP_CHECK(hipFree(X_d)); + HIP_CHECK(hipStreamDestroy(stream)); + } else { + SUCCEED("Machine does not seem to have P2P"); + } + } else { + SUCCEED("skipped the testcase as no of devices is less than 2"); + } +} + +/** + * Test Description + * ------------------------ + * - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU + 1. H2D-D2D-D2H for Host Memory<-->Device Memory + 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory + Memory is allocated in GPU-0 and Stream is created in GPU-1 + + Input : "A_h" initialized based on data type + "A_h" --> "A_d" using H2D copy + "A_d" --> "X_d" using D2D copy + "X_d" --> "B_h" using D2H copy + Output: Validating A_h with B_h both should be equal for + the number of COLUMNS and ROWS copied + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ + +TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-StreamOnDiffDevice", "" + , int, float, double) { + CHECK_IMAGE_SUPPORT + auto mem_type = GENERATE(0, 1); + int numDevices = 0; + int canAccessPeer = 0; + TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(TestType)}; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + hipStream_t stream; + + if (numDevices > 1) { + HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1)); + if (canAccessPeer) { + HIP_CHECK(hipSetDevice(0)); + + // Allocating memory + if (mem_type) { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, true); + } else { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + } + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + char *X_d{nullptr}; + size_t pitch_X; + HIP_CHECK(hipMallocPitch(reinterpret_cast(&X_d), + &pitch_X, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + + // Change device + HIP_CHECK(hipSetDevice(1)); + HIP_CHECK(hipStreamCreate(&stream)); + + // Host to Device + HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), + COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream)); + + // Device to Device + HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d, + pitch_A, COLUMNS*sizeof(TestType), + ROWS, hipMemcpyDeviceToDevice, stream)); + + // Device to Host + HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d, + pitch_X, COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + if (mem_type) { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, true); + } else { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); + } + HIP_CHECK(hipFree(X_d)); + HIP_CHECK(hipStreamDestroy(stream)); + } else { + SUCCEED("Machine does not seem to have P2P"); + } + } else { + SUCCEED("skipped the testcase as no of devices is less than 2"); + } +} + +/** + * Test Description + * ------------------------ + * - This testcase verifies the null checks of hipMemcpy2DAsync API + 1. hipMemcpy2DAsync API where Source Pitch is zero + 2. hipMemcpy2DAsync API where Destination Pitch is zero + 3. hipMemcpy2DAsync API where height is zero + 4. hipMemcpy2DAsync API where width is zero + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ + +TEST_CASE("Unit_hipMemcpy2DAsync_SizeCheck") { + CHECK_IMAGE_SUPPORT + HIP_CHECK(hipSetDevice(0)); + int* A_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(int)}; + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + // Allocating memory + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, nullptr, NUM_W*NUM_H); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); + + SECTION("hipMemcpy2DAsync API where Source Pitch is zero") { + REQUIRE(hipMemcpy2DAsync(A_h, 0, A_d, + pitch_A, NUM_W, NUM_H, + hipMemcpyDeviceToHost, stream) != hipSuccess); + } + + SECTION("hipMemcpy2DAsync API where Destination Pitch is zero") { + REQUIRE(hipMemcpy2DAsync(A_h, width, A_d, + 0, NUM_W, NUM_H, + hipMemcpyDeviceToHost, stream) != hipSuccess); + } + + SECTION("hipMemcpy2DAsync API where height is zero") { + REQUIRE(hipMemcpy2DAsync(A_h, width, A_d, + pitch_A, NUM_W, 0, + hipMemcpyDeviceToHost, stream) == hipSuccess); + } + + SECTION("hipMemcpy2DAsync API where width is zero") { + REQUIRE(hipMemcpy2DAsync(A_h, width, A_d, + pitch_A, 0, NUM_H, + hipMemcpyDeviceToHost, stream) == hipSuccess); + } + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + free(A_h); +} + +/** + * Test Description + * ------------------------ + * - This testcase performs the negative scenarios of hipMemcpy2DAsync API + 1. hipMemcpy2DAsync API by Passing nullptr to destination + 2. hipMemcpy2DAsync API by Passing nullptr to source + 3. hipMemcpy2DAsync API where width is > destination pitch + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ + +TEST_CASE("Unit_hipMemcpy2DAsync_Negative") { + CHECK_IMAGE_SUPPORT + HIP_CHECK(hipSetDevice(0)); + int* A_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(int)}; + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + // Allocating memory + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, nullptr, NUM_W*NUM_H); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); + + SECTION("hipMemcpy2DAsync API by Passing nullptr to destination") { + REQUIRE(hipMemcpy2DAsync(nullptr, width, A_d, + pitch_A, COLUMNS*sizeof(int), ROWS, + hipMemcpyDeviceToHost, stream) != hipSuccess); + } + + SECTION("hipMemcpy2DAsync API by Passing nullptr to source") { + REQUIRE(hipMemcpy2DAsync(A_h, width, nullptr, + pitch_A, COLUMNS*sizeof(int), ROWS, + hipMemcpyDeviceToHost, stream) != hipSuccess); + } + + SECTION("hipMemcpy2DAsync API where width is > destination pitch") { + REQUIRE(hipMemcpy2DAsync(A_h, 10, A_d, pitch_A, + COLUMNS*sizeof(int), ROWS, + hipMemcpyDeviceToHost, stream) != hipSuccess); + } + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipStreamDestroy(stream)); + free(A_h); +} + +static void hipMemcpy2DAsync_Basic_Size_Test(size_t inc) { + constexpr int defaultProgramSize = 256 * 1024 * 1024; + constexpr int N = 2; + constexpr int value = 42; + int *in, *out, *dev; + size_t newSize = 0, inp = 0; + size_t size = sizeof(int) * N * inc; + + size_t free, total; + HIP_CHECK(hipMemGetInfo(&free, &total)); + + if ( free < 2 * size ) + newSize = ( free - defaultProgramSize ) / 2; + else + newSize = size; + + INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes."); + INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes"); + INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes"); + + HIP_CHECK(hipHostMalloc(&in, newSize)); + HIP_CHECK(hipHostMalloc(&out, newSize)); + HIP_CHECK(hipMalloc(&dev, newSize)); + + inp = newSize / (sizeof(int) * N); + for (size_t i=0; i < N; i++) { + in[i * inp] = value; + } + + size_t pitch = sizeof(int) * inp; + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + HIP_CHECK(hipMemcpy2DAsync(dev, pitch, in, pitch, sizeof(int), + N, hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpy2DAsync(out, pitch, dev, pitch, sizeof(int), + N, hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + for (size_t i=0; i < N; i++) { + REQUIRE(out[i * inp] == value); + } + + HIP_CHECK(hipFree(dev)); + HIP_CHECK(hipHostFree(in)); + HIP_CHECK(hipHostFree(out)); + HIP_CHECK(hipStreamDestroy(stream)); +} + +/** + * Test Description + * ------------------------ + * - This testcase performs multidevice size check on hipMemcpy2DAsync API + 1. Verify hipMemcpy2DAsync with 1 << 20 size + 2. Verify hipMemcpy2DAsync with 1 << 21 size + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice_Basic_Size_Test") { + CHECK_IMAGE_SUPPORT + size_t input = 1 << 20; + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + + for (int i=0; i < numDevices; i++) { + HIP_CHECK(hipSetDevice(i)); + + SECTION("Verify hipMemcpy2DAsync with 1 << 20 size") { + hipMemcpy2DAsync_Basic_Size_Test(input); + } + SECTION("Verify hipMemcpy2DAsync with 1 << 21 size") { + input <<= 1; + hipMemcpy2DAsync_Basic_Size_Test(input); + } + } +} diff --git a/catch/unit/memory/hipMemcpy2D_old.cc b/catch/unit/memory/hipMemcpy2D_old.cc new file mode 100644 index 000000000..d7e21e42f --- /dev/null +++ b/catch/unit/memory/hipMemcpy2D_old.cc @@ -0,0 +1,496 @@ +/* +Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemcpy2D hipMemcpy2D + * @{ + * @ingroup MemcpyTest + * `hipMemcpy2D(void* dst, size_t dpitch, const void* src, + * size_t spitch, size_t width, size_t height, + * hipMemcpyKind kind)` - + * Copies data between host and device. + */ + +// Testcase Description: +// 1) Verifies the working of Memcpy2D API negative scenarios by +// Pass NULL to destination pointer +// Pass NULL to Source pointer +// Pass width greater than spitch/dpitch +// 2) Verifies hipMemcpy2D API by +// pass 0 to destionation pitch +// pass 0 to source pitch +// pass 0 to width +// pass 0 to height +// 3) Verifies working of Memcpy2D API on host memory and pinned host memory by +// performing D2H, D2D and H2D memory kind copies on same GPU +// 4) Verifies working of Memcpy2D API for the following scenarios +// H2D-D2D-D2H on host and device memory +// H2D-D2D-D2H on pinned host and device memory +// H2D-D2D-D2H functionalities where memory is allocated in GPU-0 +// and API is triggered from GPU-1 + +#include +#include + +static constexpr auto NUM_W{16}; +static constexpr auto NUM_H{16}; +static constexpr auto COLUMNS{8}; +static constexpr auto ROWS{8}; + +/** + * Test Description + * ------------------------ + * - This testcases performs the following scenarios of hipMemcpy2D API on same GPU + 1. H2D-D2D-D2H for Host Memory<-->Device Memory + 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory + + Input : "A_h" initialized based on data type + "A_h" --> "A_d" using H2D copy + "A_d" --> "B_d" using D2D copy + "B_d" --> "B_h" using D2H copy + Output: Validating A_h with B_h both should be equal for + the number of COLUMNS and ROWS copied + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H", "" + , int, float, double) { + CHECK_IMAGE_SUPPORT + // 1 refers to pinned host memory + auto mem_type = GENERATE(0, 1); + HIP_CHECK(hipSetDevice(0)); + TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}, + *B_d{nullptr}; + size_t pitch_A, pitch_B; + size_t width{NUM_W * sizeof(TestType)}; + + // Allocating memory + if (mem_type) { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, true); + } else { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + } + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&B_d), + &pitch_B, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + + // Host to Device + HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyHostToDevice)); + + // Performs D2D on same GPU device + HIP_CHECK(hipMemcpy2D(B_d, pitch_B, A_d, + pitch_A, COLUMNS*sizeof(TestType), + ROWS, hipMemcpyDeviceToDevice)); + + // hipMemcpy2D Device to Host + HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B, + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToHost)); + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(B_d)); + if (mem_type) { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, true); + } else { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); + } +} + +/** + * Test Description + * ------------------------ + * - This testcase performs the following scenarios of hipMemcpy2D API on same GPU. + 1. H2D-D2D-D2H for Host Memory<-->Device Memory + 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory + The src and dst input pointers to hipMemCpy2D add an offset to the pointers + returned by the allocation functions. + + Input : "A_h" initialized based on data type + "A_h" --> "A_d" using H2D copy + "A_d" --> "B_d" using D2D copy + "B_d" --> "B_h" using D2H copy + Output: Validating A_h with B_h both should be equal for + the number of COLUMNS and ROWS copied + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H_WithOffset", "" + , int, float, double) { + CHECK_IMAGE_SUPPORT + // 1 refers to pinned host memory + auto mem_type = GENERATE(0, 1); + HIP_CHECK(hipSetDevice(0)); + TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}, + *B_d{nullptr}; + size_t pitch_A, pitch_B; + size_t width{NUM_W * sizeof(TestType)}; + + // Allocating memory + if (mem_type) { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, true); + } else { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + } + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&B_d), + &pitch_B, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + + // Host to Device + HIP_CHECK(hipMemcpy2D(A_d+COLUMNS*sizeof(TestType), pitch_A, A_h, + COLUMNS*sizeof(TestType), COLUMNS*sizeof(TestType), + ROWS, hipMemcpyHostToDevice)); + + // Performs D2D on same GPU device + HIP_CHECK(hipMemcpy2D(B_d+COLUMNS*sizeof(TestType), pitch_B, + A_d+COLUMNS*sizeof(TestType), + pitch_A, COLUMNS*sizeof(TestType), + ROWS, hipMemcpyDeviceToDevice)); + + // hipMemcpy2D Device to Host + HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), + B_d+COLUMNS*sizeof(TestType), pitch_B, + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToHost)); + + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); + + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(B_d)); + if (mem_type) { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, true); + } else { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); + } +} + +/** + * Test Description + * ------------------------ + * - This testcases performs the following scenarios of hipMemcpy2D API on Peer GPU + 1. H2D-D2D-D2H for Host Memory<-->Device Memory + 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory + 3. Device context change where memory is allocated in GPU-0 + and API is trigerred from GPU-1 + + Input : "A_h" initialized based on data type + "A_h" --> "A_d" using H2D copy + "A_d" --> "X_d" using D2D copy + "X_d" --> "B_h" using D2H copy + Output: Validating A_h with B_h both should be equal for + the number of COLUMNS and ROWS copied + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_multiDevice-D2D", "" + , int, float, double) { + CHECK_IMAGE_SUPPORT + auto mem_type = GENERATE(0, 1); + int numDevices = 0; + int canAccessPeer = 0; + TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(TestType)}; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + if (numDevices > 1) { + HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1)); + if (canAccessPeer) { + HIP_CHECK(hipSetDevice(0)); + + // Allocating memory + if (mem_type) { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, true); + } else { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + } + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + + char *X_d{nullptr}; + size_t pitch_X; + HIP_CHECK(hipMallocPitch(reinterpret_cast(&X_d), + &pitch_X, width, NUM_H)); + + // Change device + HIP_CHECK(hipSetDevice(1)); + + // Host to Device + HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), + COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice)); + + // Device to Device + HIP_CHECK(hipMemcpy2D(X_d, pitch_X, A_d, + pitch_A, COLUMNS*sizeof(TestType), + ROWS, hipMemcpyDeviceToDevice)); + + // Device to Host + HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), X_d, + pitch_X, COLUMNS*sizeof(TestType), ROWS, hipMemcpyDeviceToHost)); + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + if (mem_type) { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, true); + } else { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); + } + HIP_CHECK(hipFree(X_d)); + } else { + SUCCEED("Machine does not seem to have P2P"); + } + } else { + SUCCEED("skipped the testcase as no of devices is less than 2"); + } +} + +/** + * Test Description + * ------------------------ + * - This Testcase verifies the null size checks of hipMemcpy2D API + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEST_CASE("Unit_hipMemcpy2D_SizeCheck") { + CHECK_IMAGE_SUPPORT + HIP_CHECK(hipSetDevice(0)); + int* A_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(int)}; + + // Allocating memory + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, nullptr, NUM_W*NUM_H); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); + + SECTION("hipMemcpy2D API where Source Pitch is zero") { + REQUIRE(hipMemcpy2D(A_h, 0, A_d, + pitch_A, NUM_W, NUM_H, + hipMemcpyDeviceToHost) != hipSuccess); + } + + SECTION("hipMemcpy2D API where Destination Pitch is zero") { + REQUIRE(hipMemcpy2D(A_h, width, A_d, + 0, NUM_W, NUM_H, + hipMemcpyDeviceToHost) != hipSuccess); + } + + SECTION("hipMemcpy2D API where height is zero") { + REQUIRE(hipMemcpy2D(A_h, width, A_d, + pitch_A, NUM_W, 0, + hipMemcpyDeviceToHost) == hipSuccess); + } + + SECTION("hipMemcpy2D API where width is zero") { + REQUIRE(hipMemcpy2D(A_h, width, A_d, + pitch_A, 0, NUM_H, + hipMemcpyDeviceToHost) == hipSuccess); + } + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + free(A_h); +} + +/** + * Test Description + * ------------------------ + * - This Testcase verifies all the negative scenarios of hipMemcpy2D API + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEST_CASE("Unit_hipMemcpy2D_Negative") { + CHECK_IMAGE_SUPPORT + HIP_CHECK(hipSetDevice(0)); + int* A_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(int)}; + + // Allocating memory + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, nullptr, NUM_W*NUM_H); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); + + SECTION("hipMemcpy2D API by Passing nullptr to destination") { + REQUIRE(hipMemcpy2D(nullptr, width, A_d, + pitch_A, COLUMNS*sizeof(int), ROWS, + hipMemcpyDeviceToHost) != hipSuccess); + } + + SECTION("hipMemcpy2D API by Passing nullptr to destination") { + REQUIRE(hipMemcpy2D(nullptr, width, nullptr, + pitch_A, COLUMNS*sizeof(int), ROWS, + hipMemcpyDeviceToHost) != hipSuccess); + } + + SECTION("hipMemcpy2D API where width is greater than destination pitch") { + REQUIRE(hipMemcpy2D(A_h, 10, A_d, pitch_A, + COLUMNS*sizeof(int), ROWS, + hipMemcpyDeviceToHost) != hipSuccess); + } + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + free(A_h); +} + +static void hipMemcpy2D_Basic_Size_Test(size_t inc) { + constexpr int defaultProgramSize = 256 * 1024 * 1024; + constexpr int N = 2; + constexpr int value = 42; + int *in, *out, *dev; + size_t newSize = 0, inp = 0; + size_t size = sizeof(int) * N * inc; + + size_t free, total; + HIP_CHECK(hipMemGetInfo(&free, &total)); + + if ( free < 2 * size ) + newSize = ( free - defaultProgramSize ) / 2; + else + newSize = size; + + INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes."); + INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes"); + INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes"); + + HIP_CHECK(hipHostMalloc(&in, newSize)); + HIP_CHECK(hipHostMalloc(&out, newSize)); + HIP_CHECK(hipMalloc(&dev, newSize)); + + inp = newSize / (sizeof(int) * N); + for (size_t i=0; i < N; i++) { + in[i * inp] = value; + } + + size_t pitch = sizeof(int) * inp; + + HIP_CHECK(hipMemcpy2D(dev, pitch, in, pitch, sizeof(int), + N, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy2D(out, pitch, dev, pitch, sizeof(int), + N, hipMemcpyDeviceToHost)); + + for (size_t i=0; i < N; i++) { + REQUIRE(out[i * inp] == value); + } + + HIP_CHECK(hipFree(dev)); + HIP_CHECK(hipHostFree(in)); + HIP_CHECK(hipHostFree(out)); +} + +/** + * Test Description + * ------------------------ + * - This testcase performs multidevice size check on hipMemcpy2D API + 1. Verify hipMemcpy2D with 1 << 20 size + 2. Verify hipMemcpy2D with 1 << 21 size + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEST_CASE("Unit_hipMemcpy2D_multiDevice_Basic_Size_Test") { + CHECK_IMAGE_SUPPORT + size_t input = 1 << 20; + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + + for (int i=0; i < numDevices; i++) { + HIP_CHECK(hipSetDevice(i)); + + SECTION("Verify hipMemcpy2D with 1 << 20 size") { + hipMemcpy2D_Basic_Size_Test(input); + } + SECTION("Verify hipMemcpy2D with 1 << 21 size") { + input <<= 1; + hipMemcpy2D_Basic_Size_Test(input); + } + } +} diff --git a/catch/unit/memory/memcpy2d_tests_common.hh b/catch/unit/memory/memcpy2d_tests_common.hh new file mode 100644 index 000000000..990dd9dcd --- /dev/null +++ b/catch/unit/memory/memcpy2d_tests_common.hh @@ -0,0 +1,325 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include + +template +void Memcpy2DDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) { + const auto kind = GENERATE(hipMemcpyDeviceToHost, hipMemcpyDefault); + + constexpr size_t cols = 127; + constexpr size_t rows = 128; + + LinearAllocGuard2D device_alloc(cols, rows); + + const size_t host_pitch = GENERATE_REF(device_alloc.width(), device_alloc.width() + 64); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, host_pitch * rows); + + const dim3 threads_per_block(32, 32); + const dim3 blocks(cols / threads_per_block.x + 1, rows / threads_per_block.y + 1); + Iota<<>>(device_alloc.ptr(), device_alloc.pitch(), + device_alloc.width_logical(), device_alloc.height(), 1); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(memcpy_func(host_alloc.ptr(), host_pitch, device_alloc.ptr(), device_alloc.pitch(), + device_alloc.width(), device_alloc.height(), kind)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(kernel_stream)); + } + + constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; }; + PitchedMemoryVerify(host_alloc.ptr(), host_pitch, device_alloc.width_logical(), + device_alloc.height(), 1, f); +} + +template +void Memcpy2DDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) { + const auto kind = GENERATE(hipMemcpyDeviceToDevice, hipMemcpyDefault); + + constexpr size_t cols = 127; + constexpr size_t rows = 128; + + const auto device_count = HipTest::getDeviceCount(); + const auto src_device = GENERATE_COPY(range(0, device_count)); + const auto dst_device = GENERATE_COPY(range(0, device_count)); + const size_t src_cols_mult = GENERATE(1, 2); + + INFO("Src device: " << src_device << ", Dst device: " << dst_device); + + HIP_CHECK(hipSetDevice(src_device)); + if constexpr (enable_peer_access) { + if (src_device == dst_device) { + return; + } + int can_access_peer = 0; + HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device)); + if (!can_access_peer) { + INFO("Peer access cannot be enabled between devices " << src_device << " " << dst_device); + REQUIRE(can_access_peer); + } + HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0)); + } + + LinearAllocGuard2D src_alloc(cols * src_cols_mult, rows); + HIP_CHECK(hipSetDevice(src_device)); + LinearAllocGuard2D dst_alloc(cols, rows); + HIP_CHECK(hipSetDevice(src_device)); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, dst_alloc.width() * rows); + + const dim3 threads_per_block(32, 32); + const dim3 blocks(cols / threads_per_block.x + 1, rows / threads_per_block.y + 1); + // Using dst_alloc width and height to set only the elements that will be copied over to + // dst_alloc + Iota<<>>(src_alloc.ptr(), src_alloc.pitch(), dst_alloc.width_logical(), + dst_alloc.height(), 1); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(memcpy_func(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(), + dst_alloc.width(), dst_alloc.height(), kind)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(kernel_stream)); + } + + HIP_CHECK(hipMemcpy2D(host_alloc.ptr(), dst_alloc.width(), dst_alloc.ptr(), dst_alloc.pitch(), + dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToHost)); + constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; }; + PitchedMemoryVerify(host_alloc.ptr(), dst_alloc.width(), dst_alloc.width_logical(), + dst_alloc.height(), 1, f); +} + +template +void Memcpy2DHostToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) { + const auto kind = GENERATE(hipMemcpyHostToDevice, hipMemcpyDefault); + + constexpr size_t cols = 127; + constexpr size_t rows = 128; + + LinearAllocGuard2D device_alloc(cols, rows); + + const size_t host_pitch = GENERATE_REF(device_alloc.pitch(), 2 * device_alloc.pitch()); + + LinearAllocGuard src_host_alloc(LinearAllocs::hipHostMalloc, host_pitch * rows); + LinearAllocGuard dst_host_alloc(LinearAllocs::hipHostMalloc, device_alloc.width() * rows); + + constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; }; + PitchedMemorySet(src_host_alloc.ptr(), host_pitch, device_alloc.width_logical(), + device_alloc.height(), 1, f); + + std::fill_n(dst_host_alloc.ptr(), device_alloc.width_logical() * rows, 0); + + HIP_CHECK(memcpy_func(device_alloc.ptr(), device_alloc.pitch(), src_host_alloc.ptr(), host_pitch, + device_alloc.width(), device_alloc.height(), kind)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(kernel_stream)); + } + + HIP_CHECK(hipMemcpy2D(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.ptr(), + device_alloc.pitch(), device_alloc.width(), device_alloc.height(), + hipMemcpyDeviceToHost)); + + PitchedMemoryVerify(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.width_logical(), + device_alloc.height(), 1, f); +} + +template +void Memcpy2DHostToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) { + const auto kind = GENERATE(hipMemcpyHostToHost, hipMemcpyDefault); + + constexpr size_t cols = 127; + constexpr size_t rows = 128; + + const size_t src_pitch = GENERATE_REF(cols * sizeof(int), cols * sizeof(int) + 64); + + LinearAllocGuard src_host(LinearAllocs::hipHostMalloc, src_pitch * rows); + LinearAllocGuard dst_host(LinearAllocs::hipHostMalloc, cols * sizeof(int) * rows); + + constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; }; + PitchedMemorySet(src_host.ptr(), src_pitch, cols, rows, 1, f); + + HIP_CHECK(memcpy_func(dst_host.ptr(), cols * sizeof(int), src_host.ptr(), src_pitch, + cols * sizeof(int), rows, kind)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(kernel_stream)); + } + + PitchedMemoryVerify(dst_host.ptr(), cols * sizeof(int), cols, rows, 1, f); +} + +// Synchronization behavior checks +template +void MemcpySyncBehaviorCheck(F memcpy_func, const bool should_sync, + const hipStream_t kernel_stream) { + LaunchDelayKernel(std::chrono::milliseconds{300}, kernel_stream); + HIP_CHECK(memcpy_func()); + if (should_sync) { + HIP_CHECK(hipStreamQuery(kernel_stream)); + } else { + HIP_CHECK_ERROR(hipStreamQuery(kernel_stream), hipErrorNotReady); + } +} + +template +void Memcpy2DHtoDSyncBehavior(F memcpy_func, const bool should_sync, + const hipStream_t kernel_stream = nullptr) { + using LA = LinearAllocs; + const auto host_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc); + LinearAllocGuard host_alloc(host_alloc_type, 32 * sizeof(int) * 32); + LinearAllocGuard2D device_alloc(32, 32); + MemcpySyncBehaviorCheck(std::bind(memcpy_func, device_alloc.ptr(), device_alloc.pitch(), + host_alloc.ptr(), device_alloc.width(), device_alloc.width(), + device_alloc.height(), hipMemcpyHostToDevice), + should_sync, kernel_stream); +} + +template +void Memcpy2DDtoHPageableSyncBehavior(F memcpy_func, const bool should_sync, + const hipStream_t kernel_stream = nullptr) { + LinearAllocGuard host_alloc(LinearAllocs::malloc, 32 * sizeof(int) * 32); + LinearAllocGuard2D device_alloc(32, 32); + MemcpySyncBehaviorCheck(std::bind(memcpy_func, host_alloc.ptr(), device_alloc.width(), + device_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height(), hipMemcpyDeviceToHost), + should_sync, kernel_stream); +} + +template +void Memcpy2DDtoHPinnedSyncBehavior(F memcpy_func, const bool should_sync, + const hipStream_t kernel_stream = nullptr) { + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, 32 * sizeof(int) * 32); + LinearAllocGuard2D device_alloc(32, 32); + MemcpySyncBehaviorCheck(std::bind(memcpy_func, host_alloc.ptr(), device_alloc.width(), + device_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height(), hipMemcpyDeviceToHost), + should_sync, kernel_stream); +} + +template +void Memcpy2DDtoDSyncBehavior(F memcpy_func, const bool should_sync, + const hipStream_t kernel_stream = nullptr) { + LinearAllocGuard2D src_alloc(32, 32); + LinearAllocGuard2D dst_alloc(32, 32); + MemcpySyncBehaviorCheck( + std::bind(memcpy_func, dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(), + dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice), + should_sync, kernel_stream); +} + +template +void Memcpy2DHtoHSyncBehavior(F memcpy_func, const bool should_sync, + const hipStream_t kernel_stream = nullptr) { + using LA = LinearAllocs; + const auto src_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc); + const auto dst_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc); + + LinearAllocGuard src_alloc(src_alloc_type, 32 * sizeof(int) * 32); + LinearAllocGuard dst_alloc(dst_alloc_type, 32 * sizeof(int) * 32); + MemcpySyncBehaviorCheck(std::bind(memcpy_func, dst_alloc.ptr(), 32 * sizeof(int), src_alloc.ptr(), + 32 * sizeof(int), 32 * sizeof(int), 32, hipMemcpyHostToHost), + should_sync, kernel_stream); +} + +template +void Memcpy2DZeroWidthHeight(F memcpy_func, const hipStream_t stream = nullptr) { + constexpr size_t cols = 63; + constexpr size_t rows = 64; + + const auto [width_mult, height_mult] = + GENERATE(std::make_pair(0, 1), std::make_pair(1, 0), std::make_pair(0, 0)); + + SECTION("Device to Host") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, device_alloc.width() * rows); + std::fill_n(host_alloc.ptr(), device_alloc.width_logical() * device_alloc.height(), 42); + HIP_CHECK(hipMemset2D(device_alloc.ptr(), device_alloc.pitch(), 1, device_alloc.width(), + device_alloc.height())); + + HIP_CHECK(memcpy_func(host_alloc.ptr(), device_alloc.width(), device_alloc.ptr(), + device_alloc.pitch(), device_alloc.width() * width_mult, + device_alloc.height() * height_mult, hipMemcpyDeviceToHost)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(stream)); + } + ArrayFindIfNot(host_alloc.ptr(), static_cast(42), + device_alloc.width_logical() * device_alloc.height()); + } + + SECTION("Device to Device") { + LinearAllocGuard2D src_alloc(cols, rows); + LinearAllocGuard2D dst_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, dst_alloc.width() * rows); + HIP_CHECK( + hipMemset2D(src_alloc.ptr(), src_alloc.pitch(), 1, src_alloc.width(), src_alloc.height())); + HIP_CHECK( + hipMemset2D(dst_alloc.ptr(), dst_alloc.pitch(), 42, dst_alloc.width(), dst_alloc.height())); + HIP_CHECK(memcpy_func(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(), + dst_alloc.width() * width_mult, dst_alloc.height() * height_mult, + hipMemcpyDeviceToDevice)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(stream)); + } + HIP_CHECK(hipMemcpy2D(host_alloc.ptr(), dst_alloc.width(), dst_alloc.ptr(), dst_alloc.pitch(), + dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToHost)); + ArrayFindIfNot(host_alloc.ptr(), static_cast(42), + dst_alloc.width_logical() * dst_alloc.height()); + } + + SECTION("Host to Device") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard src_host_alloc(LinearAllocs::hipHostMalloc, + device_alloc.width() * rows); + LinearAllocGuard dst_host_alloc(LinearAllocs::hipHostMalloc, + device_alloc.width() * rows); + std::fill_n(src_host_alloc.ptr(), device_alloc.width_logical() * device_alloc.height(), 1); + HIP_CHECK(hipMemset2D(device_alloc.ptr(), device_alloc.pitch(), 42, device_alloc.width(), + device_alloc.height())); + HIP_CHECK(memcpy_func(device_alloc.ptr(), device_alloc.pitch(), src_host_alloc.ptr(), + device_alloc.width(), device_alloc.width() * width_mult, + device_alloc.height() * height_mult, hipMemcpyHostToDevice)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(stream)); + } + HIP_CHECK(hipMemcpy2D(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.ptr(), + device_alloc.pitch(), device_alloc.width(), device_alloc.height(), + hipMemcpyDeviceToHost)); + ArrayFindIfNot(dst_host_alloc.ptr(), static_cast(42), + device_alloc.width_logical() * device_alloc.height()); + } + + SECTION("Host to Host") { + const auto alloc_size = cols * rows; + LinearAllocGuard src_alloc(LinearAllocs::hipHostMalloc, alloc_size); + LinearAllocGuard dst_alloc(LinearAllocs::hipHostMalloc, alloc_size); + std::fill_n(src_alloc.ptr(), alloc_size, 1); + std::fill_n(dst_alloc.ptr(), alloc_size, 42); + HIP_CHECK(memcpy_func(dst_alloc.ptr(), cols, src_alloc.ptr(), cols, cols * width_mult, + rows * height_mult, hipMemcpyHostToHost)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(stream)); + } + ArrayFindIfNot(dst_alloc.ptr(), static_cast(42), alloc_size); + } +} \ No newline at end of file