Skip to content

Commit

Permalink
EXSWHTEC-103 - Implement tests for hipDrvMemcpy3D APIs (#56)
Browse files Browse the repository at this point in the history
- Implement basic behavior checks in all copy directions
- Implement synchronization behavior checks for expected behavior based on cuda docs
- Implement positive tests for zero sized width and/or height copies, where no copy is expected to happen
- Implement negative parameter tests
- Implement all of the above for hipDrvMemcpy3D and hipDrvMemcpy3DAsync.
- Disable failing tests on AMD.
- Fix copyright disclaimer.
- Add defect issue numbers.
  • Loading branch information
mirza-halilcevic authored Jun 28, 2023
1 parent c2d51ba commit c695f1b
Show file tree
Hide file tree
Showing 8 changed files with 1,753 additions and 1,049 deletions.
3 changes: 3 additions & 0 deletions catch/hipTestMain/config/config_amd_linux_common.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep_ClonedGrph",
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep_ChldNode",
"Unit_hipMemGetAddressRange_Negative",
"NOTE: The following 2 tests are disabled due to defect - EXSWHTEC-238",
"Unit_hipDrvMemcpy3D_Positive_Array",
"Unit_hipDrvMemcpy3DAsync_Positive_Array",
"Unit_hipMemRangeGetAttribute_Positive_AccessedBy_Basic",
"Unit_hipMemRangeGetAttribute_Positive_AccessedBy_Partial_Range",
"Unit_hipMemRangeGetAttributes_Negative_Parameters",
Expand Down
3 changes: 3 additions & 0 deletions catch/hipTestMain/config/config_amd_windows_common.json
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep_ClonedGrph",
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep_ChldNode",
"Unit_hipMemGetAddressRange_Negative",
"NOTE: The following 2 tests are disabled due to defect - EXSWHTEC-238",
"Unit_hipDrvMemcpy3D_Positive_Array",
"Unit_hipDrvMemcpy3DAsync_Positive_Array",
"Unit_hipMemGetAddressRange_Positive",
"Note: devicelib hangs and failures",
"Unit_deviceAllocation_Malloc_PerThread_PrimitiveDataType",
Expand Down
215 changes: 215 additions & 0 deletions catch/include/memcpy3d_tests_common.hh
Original file line number Diff line number Diff line change
Expand Up @@ -582,4 +582,219 @@ void Memcpy3DZeroWidthHeightDepth(F memcpy_func, const hipStream_t stream = null
}
ArrayFindIfNot(dst_alloc.ptr(), static_cast<uint8_t>(42), alloc_size);
}
}

constexpr auto MemTypeHost() {
#if HT_AMD
return hipMemoryTypeHost;
#else
return CU_MEMORYTYPE_HOST;
#endif
}

constexpr auto MemTypeDevice() {
#if HT_AMD
return hipMemoryTypeDevice;
#else
return CU_MEMORYTYPE_DEVICE;
#endif
}

constexpr auto MemTypeArray() {
#if HT_AMD
return hipMemoryTypeArray;
#else
return CU_MEMORYTYPE_ARRAY;
#endif
}

constexpr auto MemTypeUnified() {
#if HT_AMD
return hipMemoryTypeUnified;
#else
return CU_MEMORYTYPE_UNIFIED;
#endif
}

using DrvPtrVariant = std::variant<hipPitchedPtr, hiparray>;

template <bool async = false>
hipError_t DrvMemcpy3DWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr,
hipPos src_pos, hipExtent extent, hipMemcpyKind kind,
hipStream_t stream = nullptr) {
HIP_MEMCPY3D parms = {0};

if (std::holds_alternative<hiparray>(dst_ptr)) {
parms.dstMemoryType = MemTypeArray();
parms.dstArray = std::get<hiparray>(dst_ptr);
} else {
auto ptr = std::get<hipPitchedPtr>(dst_ptr);
parms.dstPitch = ptr.pitch;
switch (kind) {
case hipMemcpyDeviceToHost:
case hipMemcpyHostToHost:
parms.dstMemoryType = MemTypeHost();
parms.dstHost = ptr.ptr;
break;
case hipMemcpyDeviceToDevice:
case hipMemcpyHostToDevice:
parms.dstMemoryType = MemTypeDevice();
parms.dstDevice = reinterpret_cast<hipDeviceptr_t>(ptr.ptr);
break;
case hipMemcpyDefault:
parms.dstMemoryType = MemTypeUnified();
parms.dstDevice = reinterpret_cast<hipDeviceptr_t>(ptr.ptr);
break;
default:
assert(false);
}
}

if (std::holds_alternative<hiparray>(src_ptr)) {
parms.srcMemoryType = MemTypeArray();
parms.srcArray = std::get<hiparray>(src_ptr);
} else {
auto ptr = std::get<hipPitchedPtr>(src_ptr);
parms.srcPitch = ptr.pitch;
switch (kind) {
case hipMemcpyDeviceToHost:
case hipMemcpyDeviceToDevice:
parms.srcMemoryType = MemTypeDevice();
parms.srcDevice = reinterpret_cast<hipDeviceptr_t>(ptr.ptr);
break;
case hipMemcpyHostToDevice:
case hipMemcpyHostToHost:
parms.srcMemoryType = MemTypeHost();
parms.srcHost = ptr.ptr;
break;
case hipMemcpyDefault:
parms.srcMemoryType = MemTypeUnified();
parms.srcDevice = reinterpret_cast<hipDeviceptr_t>(ptr.ptr);
break;
default:
assert(false);
}
}

parms.WidthInBytes = extent.width;
parms.Height = extent.height;
parms.Depth = extent.depth;
parms.srcXInBytes = src_pos.x;
parms.srcY = src_pos.y;
parms.srcZ = src_pos.z;
parms.dstXInBytes = dst_pos.x;
parms.dstY = dst_pos.y;
parms.dstZ = dst_pos.z;

if constexpr (async) {
return hipDrvMemcpy3DAsync(&parms, stream);
} else {
return hipDrvMemcpy3D(&parms);
}
}

template <bool should_synchronize, typename F>
void DrvMemcpy3DArrayHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
constexpr hipExtent extent{127 * sizeof(int), 128, 8};

LinearAllocGuard<int> src_host(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
LinearAllocGuard<int> dst_host(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);

DrvArrayAllocGuard<int> src_array(extent);
DrvArrayAllocGuard<int> dst_array(extent);

const auto f = [extent](size_t x, size_t y, size_t z) {
constexpr auto width_logical = extent.width / sizeof(int);
return z * width_logical * extent.height + y * width_logical + x;
};
PitchedMemorySet(src_host.ptr(), extent.width, extent.width / sizeof(int), extent.height,
extent.depth, f);

// Host -> Array
HIP_CHECK(
memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_host.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}

// Array -> Array
HIP_CHECK(memcpy_func(dst_array.ptr(), make_hipPos(0, 0, 0), src_array.ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}

// Array -> Host
HIP_CHECK(
memcpy_func(make_hipPitchedPtr(dst_host.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0), dst_array.ptr(), make_hipPos(0, 0, 0), extent,
hipMemcpyDeviceToHost, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}

PitchedMemoryVerify(dst_host.ptr(), extent.width, extent.width / sizeof(int), extent.height,
extent.depth, f);
}

template <bool should_synchronize, typename F>
void DrvMemcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
constexpr hipExtent extent{127 * sizeof(int), 128, 8};

LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);

DrvArrayAllocGuard<int> src_array(extent);
DrvArrayAllocGuard<int> dst_array(extent);

LinearAllocGuard3D<int> src_device(extent);
LinearAllocGuard3D<int> dst_device(extent);

const dim3 threads_per_block(32, 32);
const dim3 blocks(src_device.width_logical() / threads_per_block.x + 1,
src_device.height() / threads_per_block.y + 1, src_device.depth());
Iota<<<blocks, threads_per_block>>>(src_device.ptr(), src_device.pitch(),
src_device.width_logical(), src_device.height(),
src_device.depth());
HIP_CHECK(hipGetLastError());

// Device -> Array
HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0), src_device.pitched_ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}

// Array -> Array
HIP_CHECK(memcpy_func(dst_array.ptr(), make_hipPos(0, 0, 0), src_array.ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}

// Array -> Device
HIP_CHECK(memcpy_func(dst_device.pitched_ptr(), make_hipPos(0, 0, 0), dst_array.ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}

HIP_CHECK(
memcpy_func(make_hipPitchedPtr(host_alloc.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0), dst_device.pitched_ptr(), make_hipPos(0, 0, 0),
dst_device.extent(), hipMemcpyDeviceToHost, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}

const auto f = [extent](size_t x, size_t y, size_t z) {
constexpr auto width_logical = extent.width / sizeof(int);
return z * width_logical * extent.height + y * width_logical + x;
};
PitchedMemoryVerify(host_alloc.ptr(), extent.width, extent.width / sizeof(int), extent.height,
extent.depth, f);
}
2 changes: 2 additions & 0 deletions catch/unit/memory/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ set(TEST_SRC
hipArrayCreate.cc
hipArray3DCreate.cc
hipDrvMemcpy3D.cc
hipDrvMemcpy3D_old.cc
hipDrvMemcpy3DAsync.cc
hipDrvMemcpy3DAsync_old.cc
hipPointerGetAttribute.cc
hipDrvPtrGetAttributes.cc
hipMemPrefetchAsync.cc
Expand Down
Loading

0 comments on commit c695f1b

Please sign in to comment.