From a64e44794eb8f6aa00f1b9880a4c7bff722aed57 Mon Sep 17 00:00:00 2001 From: Cordell Bloor Date: Fri, 27 Jan 2023 13:01:34 -0700 Subject: [PATCH] Cleanup device vectors --- clients/include/testing_gesvdx.hpp | 6 +- clients/include/testing_managed_malloc.hpp | 4 +- clients/rocblascommon/device_batch_vector.hpp | 304 +++++------------- clients/rocblascommon/device_memory.hpp | 27 ++ .../device_strided_batch_vector.hpp | 273 +++++----------- clients/rocblascommon/rocblas_vector.hpp | 4 +- 6 files changed, 209 insertions(+), 409 deletions(-) create mode 100644 clients/rocblascommon/device_memory.hpp diff --git a/clients/include/testing_gesvdx.hpp b/clients/include/testing_gesvdx.hpp index 4dd344b8c..920e7ab50 100644 --- a/clients/include/testing_gesvdx.hpp +++ b/clients/include/testing_gesvdx.hpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (c) 2022 Advanced Micro Devices, Inc. + * Copyright (c) 2022-2023 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once @@ -166,7 +166,7 @@ void testing_gesvdx_bad_arg() // check bad arguments gesvdx_checkBadArgs(handle, left_svect, right_svect, srange, m, n, dA.data(), lda, - stA, vl, vu, il, iu, dNsv, dS.data(), stS, dU.data(), ldu, stU, + stA, vl, vu, il, iu, dNsv.data(), dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, difail.data(), stF, dinfo.data(), bc); } else @@ -177,7 +177,7 @@ void testing_gesvdx_bad_arg() // check bad arguments gesvdx_checkBadArgs(handle, left_svect, right_svect, srange, m, n, dA.data(), lda, - stA, vl, vu, il, iu, dNsv, dS.data(), stS, dU.data(), ldu, stU, + stA, vl, vu, il, iu, dNsv.data(), dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, difail.data(), stF, dinfo.data(), bc); } } diff --git a/clients/include/testing_managed_malloc.hpp b/clients/include/testing_managed_malloc.hpp index 252f73797..4ad1919ef 100644 --- a/clients/include/testing_managed_malloc.hpp +++ b/clients/include/testing_managed_malloc.hpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (c) 2020-2022 Advanced Micro Devices, Inc. + * Copyright (c) 2020-2023 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once @@ -7,6 +7,8 @@ #include "clientcommon.hpp" #include "lapack_host_reference.hpp" #include "norm.hpp" +#include "rocblascommon/rocblas_init.hpp" +#include "rocblascommon/rocblas_vector.hpp" #include "rocsolver.hpp" #include "rocsolver_arguments.hpp" #include "rocsolver_test.hpp" diff --git a/clients/rocblascommon/device_batch_vector.hpp b/clients/rocblascommon/device_batch_vector.hpp index 8b6f1052e..7643baf61 100644 --- a/clients/rocblascommon/device_batch_vector.hpp +++ b/clients/rocblascommon/device_batch_vector.hpp @@ -1,298 +1,170 @@ /* ************************************************************************ - * Copyright (c) 2018-2020 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2023 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once -#include "d_vector.hpp" +#include +#include +#include +#include + +#include +#include + +#include "common_host_helpers.hpp" +#include "device_memory.hpp" -// -// Local declaration of the host strided batch vector. -// template class host_batch_vector; -//! -//! @brief pseudo-vector subclass which uses a batch of device memory pointers -//! and -//! - an array of pointers in host memory -//! - an array of pointers in device memory -//! template -class device_batch_vector : private d_vector +class device_batch_vector { public: - using value_type = T; - -public: - //! - //! @brief Disallow copying. - //! - device_batch_vector(const device_batch_vector&) = delete; - - //! - //! @brief Disallow assigning. - //! - device_batch_vector& operator=(const device_batch_vector&) = delete; - - //! - //! @brief Constructor. - //! @param n The length of the vector. - //! @param inc The increment. - //! @param batch_count The batch count. - //! - explicit device_batch_vector(rocblas_int n, rocblas_int inc, rocblas_int batch_count) - : d_vector(size_t(n) * std::abs(inc)) - , m_n(n) - , m_inc(inc) - , m_batch_count(batch_count) - { - if(false == this->try_initialize_memory()) + device_batch_vector(rocblas_int n, rocblas_int inc, rocblas_int batch_count) + : hPtrArr_(std::make_unique(batch_count)) + , n_(n) + , inc_(inc) + , batch_count_(batch_count) + { + assert(n > 0); + assert(batch_count > 0); + + T** dPtrArr; + THROW_IF_HIP_ERROR(hipMalloc(&dPtrArr, sizeof(T*) * batch_count)); + dPtrArr_ = std::unique_ptr(dPtrArr); + + auto tmp = std::make_unique(batch_count); + const size_t size = vsize(); + for(rocblas_int i = 0; i < batch_count; ++i) { - this->free_memory(); + T* dArr; + THROW_IF_HIP_ERROR(hipMalloc(&dArr, sizeof(T) * size)); + hPtrArr_[i].reset(dArr); + tmp[i] = dArr; } + THROW_IF_HIP_ERROR(hipMemcpy(dPtrArr, tmp.get(), sizeof(T*) * batch_count, hipMemcpyHostToDevice)); } - //! - //! @brief Constructor. - //! @param n The length of the vector. - //! @param inc The increment. - //! @param stride (UNUSED) The stride. - //! @param batch_count The batch count. - //! - explicit device_batch_vector(rocblas_int n, - rocblas_int inc, - rocblas_stride stride, - rocblas_int batch_count) + device_batch_vector(rocblas_int n, rocblas_int inc, rocblas_stride stride, rocblas_int batch_count) : device_batch_vector(n, inc, batch_count) { + assert(stride == 1); } - //! - //! @brief Constructor (kept for backward compatibility only, to be removed). - //! @param batch_count The number of vectors. - //! @param size_vector The size of each vectors. - //! - explicit device_batch_vector(rocblas_int batch_count, size_t size_vector) - : device_batch_vector(size_vector, 1, batch_count) + // The number of elements in each vector. + rocblas_int n() const noexcept { + return n_; } - //! - //! @brief Destructor. - //! - ~device_batch_vector() + // The increment between elements in each vector. + rocblas_int inc() const noexcept { - this->free_memory(); + return inc_; } - //! - //! @brief Returns the length of the vector. - //! - rocblas_int n() const + // The size of each vector. This is a derived property of the number of elements in the vector + // and the spacing between them. + size_t vsize() const { - return this->m_n; + return size_t(n_) * std::abs(inc_); } - //! - //! @brief Returns the increment of the vector. - //! - rocblas_int inc() const + // The number of vectors in the batch. + rocblas_int batch_count() const noexcept { - return this->m_inc; + return batch_count_; } - //! - //! @brief Returns the value of batch_count. - //! - rocblas_int batch_count() const + T* const* data() { - return this->m_batch_count; + return dPtrArr_.get(); } - //! - //! @brief Returns the stride value. - //! - rocblas_stride stride() const + const T* const* data() const { - return 0; + return dPtrArr_.get(); } - - //! - //! @brief Access to device data. - //! @return Pointer to the device data. - //! - T** ptr_on_device() +/* + T* const* ddata() { - return this->m_device_data; + return dPtrArr_; } - //! - //! @brief Const access to device data. - //! @return Const pointer to the device data. - //! - const T* const* ptr_on_device() const + const T* const* ddata() const { - return this->m_device_data; + return dPtrArr_; } - T* const* data() + T* const* hdata() { - return this->m_device_data; + return hPtrArr_; } - const T* const* data() const + const T* const* hdata() const { - return this->m_device_data; + return hPtrArr_; } - - //! - //! @brief Random access. - //! @param batch_index The batch index. - //! @return Pointer to the array on device. - //! +*/ T* operator[](rocblas_int batch_index) { - return this->m_data[batch_index]; + assert(batch_index >= 0); + assert(batch_index < batch_count_); + return hPtrArr_[batch_index].get(); } - //! - //! @brief Constant random access. - //! @param batch_index The batch index. - //! @return Constant pointer to the array on device. - //! const T* operator[](rocblas_int batch_index) const { - return this->m_data[batch_index]; + assert(batch_index >= 0); + assert(batch_index < batch_count_); + return hPtrArr_[batch_index].get(); } - //! - //! @brief Const cast of the data on host. - //! operator const T* const *() const { - return this->m_data; + return hPtrArr_; } // clang-format off - //! - //! @brief Cast of the data on host. - //! operator T**() { - return this->m_data; + return hPtrArr_; } // clang-format on - //! - //! @brief Tell whether ressources allocation failed. - //! explicit operator bool() const { - return nullptr != this->m_data; + return nullptr != hPtrArr_; } - //! - //! @brief Copy from a host batched vector. - //! @param that The host_batch_vector to copy. - //! hipError_t transfer_from(const host_batch_vector& that) { - hipError_t hip_err; - // - // Copy each vector. - // - for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) - { - if(hipSuccess - != (hip_err = hipMemcpy((*this)[batch_index], that[batch_index], - sizeof(T) * this->nmemb(), hipMemcpyHostToDevice))) - { - return hip_err; - } - } + assert(n_ == that.n()); + assert(inc_ == that.inc()); + assert(batch_count_ == that.batch_count()); - return hipSuccess; + hipError_t err = hipSuccess; + device_batch_vector& self = *this; + size_t num_bytes = vsize() * sizeof(T); + for(size_t b = 0; err == hipSuccess && b < batch_count_; ++b) + err = hipMemcpy(self[b], that[b], num_bytes, hipMemcpyHostToDevice); + return err; } - //! - //! @brief Check if memory exists. - //! @return hipSuccess if memory exists, hipErrorOutOfMemory otherwise. - //! hipError_t memcheck() const { - if(*this) - return hipSuccess; - else - return hipErrorOutOfMemory; + return hipSuccess; } private: - rocblas_int m_n{}; - rocblas_int m_inc{}; - rocblas_int m_batch_count{}; - T** m_data{}; - T** m_device_data{}; + using PtrDArrT = std::unique_ptr; - //! - //! @brief Try to allocate the ressources. - //! @return true if success false otherwise. - //! - bool try_initialize_memory() - { - bool success = false; - - success = (hipSuccess == (hipMalloc)(&this->m_device_data, this->m_batch_count * sizeof(T*))); - if(success) - { - success = (nullptr != (this->m_data = (T**)calloc(this->m_batch_count, sizeof(T*)))); - if(success) - { - for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) - { - success = (nullptr != (this->m_data[batch_index] = this->device_vector_setup())); - if(!success) - { - break; - } - } - - if(success) - { - success = (hipSuccess - == hipMemcpy(this->m_device_data, this->m_data, - sizeof(T*) * this->m_batch_count, hipMemcpyHostToDevice)); - } - } - } - return success; - } - - //! - //! @brief Free the ressources, as much as we can. - //! - void free_memory() - { - if(nullptr != this->m_data) - { - for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) - { - if(nullptr != this->m_data[batch_index]) - { - this->device_vector_teardown(this->m_data[batch_index]); - this->m_data[batch_index] = nullptr; - } - } - - free(this->m_data); - this->m_data = nullptr; - } - - if(nullptr != this->m_device_data) - { - auto tmp_device_data = this->m_device_data; - this->m_device_data = nullptr; - CHECK_HIP_ERROR((hipFree)(tmp_device_data)); - } - } +private: + std::unique_ptr hPtrArr_; + std::unique_ptr dPtrArr_; + rocblas_int n_; + rocblas_int inc_; + rocblas_int batch_count_; }; diff --git a/clients/rocblascommon/device_memory.hpp b/clients/rocblascommon/device_memory.hpp new file mode 100644 index 000000000..50cdc70ba --- /dev/null +++ b/clients/rocblascommon/device_memory.hpp @@ -0,0 +1,27 @@ +/* ************************************************************************ + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * ************************************************************************ */ + +#pragma once + +#include + +#include + +#include "common_host_helpers.hpp" + +struct device_deleter +{ + void operator()(void* p) const + { + // Throwing an error when hipFree fails will likely result in throwing + // from a destructor, which should be avoided. However, we don't really + // have many options. Worst comes to worst, throwing will result in + // std::terminate being called, which is perhaps not such a bad thing + // in the test and bench clients where this is used. + THROW_IF_HIP_ERROR(hipFree(p)); + } +}; + +template +using unique_device_ptr = std::unique_ptr; diff --git a/clients/rocblascommon/device_strided_batch_vector.hpp b/clients/rocblascommon/device_strided_batch_vector.hpp index cc3bbb332..96dbc0b14 100644 --- a/clients/rocblascommon/device_strided_batch_vector.hpp +++ b/clients/rocblascommon/device_strided_batch_vector.hpp @@ -1,243 +1,140 @@ /* ************************************************************************ - * Copyright (c) 2018-2020 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2023 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once -// -// Local declaration of the host strided batch vector. -// +#include +#include +#include +#include +#include + +#include +#include + +#include "common_host_helpers.hpp" +#include "device_memory.hpp" + template class host_strided_batch_vector; -//! -//! @brief Implementation of a strided batched vector on device. -//! template -class device_strided_batch_vector : public d_vector +class device_strided_batch_vector { public: - using value_type = T; - -public: - //! - //! @brief The storage type to use. - //! - typedef enum class estorage - { - block, - interleave, - } storage; - - //! - //! @brief Disallow copying. - //! - device_strided_batch_vector(const device_strided_batch_vector&) = delete; - - //! - //! @brief Disallow assigning. - //! - device_strided_batch_vector& operator=(const device_strided_batch_vector&) = delete; - - //! - //! @brief Constructor. - //! @param n The length of the vector. - //! @param inc The increment. - //! @param stride The stride. - //! @param batch_count The batch count. - //! @param stg The storage format to use. - //! - explicit device_strided_batch_vector(rocblas_int n, - rocblas_int inc, - rocblas_stride stride, - rocblas_int batch_count, - storage stg = storage::block) - : d_vector(calculate_nmemb(n, inc, stride, batch_count, stg)) - , m_storage(stg) - , m_n(n) - , m_inc(inc) - , m_stride(stride) - , m_batch_count(batch_count) + device_strided_batch_vector(rocblas_int n, + rocblas_int inc, + rocblas_stride stride, + rocblas_int batch_count) + : n_(n) + , inc_(inc) + , stride_(stride) + , batch_count_(batch_count) { - bool valid_parameters = true; - - switch(this->m_storage) - { - case storage::block: - { - if(std::abs(this->m_stride) < this->m_n * std::abs(this->m_inc)) - { - valid_parameters = false; - } - break; - } - case storage::interleave: - { - if(std::abs(this->m_inc) < std::abs(this->m_stride) * this->m_batch_count) - { - valid_parameters = false; - } - break; - } - } - - if(valid_parameters) - { - this->m_data = this->device_vector_setup(); - } - } + assert(n > 0); + assert(stride != 0); + assert(batch_count > 0); + assert(size_t(n) * std::abs(inc) <= std::abs(stride)); - //! - //! @brief Destructor. - //! - ~device_strided_batch_vector() - { - if(nullptr != this->m_data) - { - this->device_vector_teardown(this->m_data); - this->m_data = nullptr; - } - } - - //! - //! @brief Returns the data pointer. - //! - T* data() - { - return this->m_data; + const size_t sz = size(); + assert(sz > 0); + T* data; + THROW_IF_HIP_ERROR(hipMalloc(&data, sizeof(T) * sz)); + data_ = std::unique_ptr(data); } - //! - //! @brief Returns the data pointer. - //! - const T* data() const + // The number of elements in each vector. + rocblas_int n() const noexcept { - return this->m_data; + return n_; } - //! - //! @brief Returns the length. - //! - rocblas_int n() const + // The increment between elements in each vector. + rocblas_int inc() const noexcept { - return this->m_n; + return inc_; } - //! - //! @brief Returns the increment. - //! - rocblas_int inc() const + // The number of vectors in the batch. + rocblas_int batch_count() const noexcept { - return this->m_inc; + return batch_count_; } - //! - //! @brief Returns the batch count. - //! - rocblas_int batch_count() const + // The total number elements in all vectors in the batch. + rocblas_stride size() const { - return this->m_batch_count; + return size_t(std::abs(stride_)) * batch_count_; } - //! - //! @brief Returns the stride value. - //! - rocblas_stride stride() const + // The number of elements from the start of one vector to the start of the next. + rocblas_stride stride() const noexcept { - return this->m_stride; + return stride_; } - //! - //! @brief Returns pointer. - //! @param batch_index The batch index. - //! @return A mutable pointer to the batch_index'th vector. - //! + // Returns a vector from the batch. T* operator[](rocblas_int batch_index) { - return (this->m_stride >= 0) - ? this->m_data + batch_index * this->m_stride - : this->m_data + (batch_index + 1 - this->m_batch_count) * this->m_stride; + assert(batch_index >= 0); + assert(batch_index < batch_count_); + + rocblas_stride index + = stride_ >= 0 ? stride_ * batch_index : stride_ * (batch_index - batch_count_ + 1); + + assert(index >= 0); + assert(index < size()); + + return &data_[index]; } - //! - //! @brief Returns non-mutable pointer. - //! @param batch_index The batch index. - //! @return A non-mutable mutable pointer to the batch_index'th vector. - //! + // Returns a vector from the batch. const T* operator[](rocblas_int batch_index) const { - return (this->m_stride >= 0) - ? this->m_data + batch_index * this->m_stride - : this->m_data + (batch_index + 1 - this->m_batch_count) * this->m_stride; - } + assert(batch_index >= 0); + assert(batch_index < batch_count_); - //! - //! @brief Cast operator. - //! @remark Returns the pointer of the first vector. - //! - operator T*() - { - return (*this)[0]; + rocblas_stride index + = stride_ >= 0 ? stride_ * batch_index : stride_ * (batch_index - batch_count_ + 1); + + assert(index >= 0); + assert(index < size()); + + return &data_[index]; } - //! - //! @brief Non-mutable cast operator. - //! @remark Returns the non-mutable pointer of the first vector. - //! - operator const T*() const + // Returns a pointer to the underlying array. + T* data() noexcept { - return (*this)[0]; + return data_.get(); } - //! - //! @brief Tell whether ressources allocation failed. - //! - explicit operator bool() const + // Returns a pointer to the underlying array. + const T* data() const noexcept { - return nullptr != this->m_data; + return data_.get(); } - //! - //! @brief Transfer data from a strided batched vector on device. - //! @param that That strided batched vector on device. - //! @return The hip error. - //! hipError_t transfer_from(const host_strided_batch_vector& that) { - return hipMemcpy(this->data(), that.data(), sizeof(T) * this->nmemb(), hipMemcpyHostToDevice); + assert(n_ == that.n()); + assert(inc_ == that.inc()); + assert(stride_ == that.stride()); + assert(batch_count_ == that.batch_count()); + + return hipMemcpy(this->data(), that.data(), sizeof(T) * size(), hipMemcpyHostToDevice); } - //! - //! @brief Check if memory exists. - //! @return hipSuccess if memory exists, hipErrorOutOfMemory otherwise. - //! hipError_t memcheck() const { - if(*this) - return hipSuccess; - else - return hipErrorOutOfMemory; + return hipSuccess; } private: - storage m_storage{storage::block}; - rocblas_int m_n{}; - rocblas_int m_inc{}; - rocblas_stride m_stride{}; - rocblas_int m_batch_count{}; - T* m_data{}; - - static size_t calculate_nmemb(rocblas_int n, - rocblas_int inc, - rocblas_stride stride, - rocblas_int batch_count, - storage st) - { - switch(st) - { - case storage::block: return size_t(std::abs(stride)) * batch_count; - case storage::interleave: return size_t(n) * std::abs(inc); - } - return 0; - } + std::unique_ptr data_; + rocblas_int n_; + rocblas_int inc_; + rocblas_stride stride_; + rocblas_int batch_count_; }; diff --git a/clients/rocblascommon/rocblas_vector.hpp b/clients/rocblascommon/rocblas_vector.hpp index f684c5db2..e7ed79ce7 100644 --- a/clients/rocblascommon/rocblas_vector.hpp +++ b/clients/rocblascommon/rocblas_vector.hpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (c) 2018-2022 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2023 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once @@ -10,6 +10,8 @@ #include "host_batch_vector.hpp" #include "host_strided_batch_vector.hpp" +#include "rocblas_random.hpp" + //! //! @brief Random number with type deductions. //!