Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Unify Tensors PR #2] Replaced pten::LoD with paddle::framework::LoD #38275

Merged
merged 6 commits into from
Dec 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,16 @@ endif()
cc_test(copy_same_tensor_test SRCS copy_same_tensor_test.cc DEPS tensor)

cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
cc_library(mixed_vector SRCS mixed_vector.cc DEPS device_context)

if(WITH_GPU)
nv_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS place memory device_context tensor)
nv_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS mixed_vector place memory device_context tensor)
elseif(WITH_ROCM)
hip_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS place memory device_context tensor)
hip_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS mixed_vector place memory device_context tensor)
else()
cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor)
cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS mixed_vector place memory device_context tensor)
endif()
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version)
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim mixed_vector place tensor framework_proto version)

cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory)

Expand Down
87 changes: 87 additions & 0 deletions paddle/fluid/framework/mixed_vector.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/mixed_vector.h"

#include <algorithm>
#include <initializer_list>
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#include <vector>

#include "glog/logging.h"
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/utils/none.h"
#include "paddle/utils/optional.h"

namespace paddle {
namespace framework {

template <typename T>
void CopyToCPUHelper(std::vector<T> *cpu_, paddle::memory::AllocationPtr *gpu_,
size_t *gpu_memory_size_) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// COPY GPU Data To CPU
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get((*gpu_)->place()));
auto stream = dev_ctx->stream();
void *src = (*gpu_)->ptr();
void *dst = cpu_->data();
paddle::memory::Copy(platform::CPUPlace(), dst,
OptionalCUDAPlace(*gpu_).get(), src, *gpu_memory_size_,
stream);
dev_ctx->Wait();
#endif
}

template <typename T>
void CopyCPUDataToCUDAHelper(std::vector<T> *cpu_,
paddle::memory::AllocationPtr *gpu_,
size_t *gpu_memory_size_,
const platform::Place &place) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void *src = cpu_->data();
*gpu_memory_size_ = cpu_->size() * sizeof(T); // sizeof(T)
(*gpu_) = memory::Alloc(place, *gpu_memory_size_);
void *dst = (*gpu_)->ptr();
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(place));
auto stream = dev_ctx->stream();
paddle::memory::Copy(OptionalCUDAPlace(*gpu_).get(), dst,
platform::CPUPlace(), src, *gpu_memory_size_, stream);
#endif
}

#define INSTANTIATE_VECTOR_FOR_TYPE(__TYPE__) \
template <> \
void Vector<__TYPE__>::VectorData::CopyToCPU() const { \
CopyToCPUHelper<__TYPE__>(&cpu_, &gpu_, &gpu_memory_size_); \
} \
\
template <> \
void Vector<__TYPE__>::VectorData::CopyCPUDataToCUDA( \
const platform::Place &place) const { \
CopyCPUDataToCUDAHelper<__TYPE__>(&cpu_, &gpu_, &gpu_memory_size_, place); \
}

INSTANTIATE_VECTOR_FOR_TYPE(size_t)
INSTANTIATE_VECTOR_FOR_TYPE(int)
INSTANTIATE_VECTOR_FOR_TYPE(int64_t)

}; // namespace framework
} // namespace paddle
121 changes: 13 additions & 108 deletions paddle/fluid/framework/mixed_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,21 @@ limitations under the License. */

#include "glog/logging.h"
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/utils/none.h"
#include "paddle/utils/optional.h"

namespace paddle {
namespace framework {

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
inline paddle::optional<platform::CUDAPlace> OptionalCUDAPlace(
const paddle::memory::allocation::AllocationPtr &gpu_) {
return gpu_ == nullptr
? paddle::none
: paddle::optional<platform::CUDAPlace>(
BOOST_GET_CONST(platform::CUDAPlace, gpu_->place()));
}

// Vector<T> implements the std::vector interface, and can get Data or
// MutableData from any place. The data will be synced implicitly inside.
template <typename T>
Expand Down Expand Up @@ -198,10 +202,7 @@ class Vector {
std::mutex &Mutex() const { return mtx_; }

paddle::optional<platform::CUDAPlace> CUDAPlace() const {
return gpu_ == nullptr
? paddle::none
: paddle::optional<platform::CUDAPlace>(
BOOST_GET_CONST(platform::CUDAPlace, gpu_->place()));
return OptionalCUDAPlace(gpu_);
}

private:
Expand All @@ -212,17 +213,7 @@ class Vector {
kDirty = 0x10
};

void CopyToCPU() const {
// COPY GPU Data To CPU
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(gpu_->place()));
auto stream = dev_ctx->stream();
void *src = gpu_->ptr();
void *dst = cpu_.data();
paddle::memory::Copy(platform::CPUPlace(), dst, CUDAPlace().get(), src,
gpu_memory_size_, stream);
dev_ctx->Wait();
}
void CopyToCPU() const;

void MutableCPU() {
if (IsInCUDA() && IsDirty()) {
Expand Down Expand Up @@ -260,17 +251,7 @@ class Vector {
}
}

void CopyCPUDataToCUDA(const platform::Place &place) const {
void *src = cpu_.data();
gpu_memory_size_ = cpu_.size() * sizeof(T);
gpu_ = memory::Alloc(place, gpu_memory_size_);
void *dst = gpu_->ptr();
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(place));
auto stream = dev_ctx->stream();
paddle::memory::Copy(CUDAPlace().get(), dst, platform::CPUPlace(), src,
gpu_memory_size_, stream);
}
void CopyCPUDataToCUDA(const platform::Place &place) const;

void ImmutableCPU() const {
if (IsDirty() && !IsInCPU()) { // If data has been changed in CUDA, or
Expand All @@ -291,7 +272,7 @@ class Vector {
bool IsInCPU() const { return flag_ & kDataInCPU; }

mutable std::vector<T> cpu_;
mutable paddle::memory::AllocationPtr gpu_;
mutable paddle::memory::allocation::AllocationPtr gpu_;
mutable size_t gpu_memory_size_{0};
mutable int flag_;

Expand Down Expand Up @@ -465,81 +446,5 @@ class Vector {
mutable details::COWPtr<VectorData> m_;
};

#else // PADDLE_WITH_CUDA

template <typename T>
class CPUVector : public std::vector<T, std::allocator<T>> {
public:
CPUVector() : std::vector<T>() {}
CPUVector(size_t count, const T &value = T()) // NOLINT
: std::vector<T>(count, value) {}
CPUVector(std::initializer_list<T> init) : std::vector<T>(init) {}
CPUVector(const std::vector<T> &other) : std::vector<T>(other) {} // NOLINT
CPUVector(const CPUVector<T> &other) : std::vector<T>(other) {}
CPUVector(CPUVector<T> &&other) : std::vector<T>(std::move(other)) {}
CPUVector(std::vector<T> &&other) // NOLINT
: std::vector<T>(std::move(other)) {}
CPUVector &operator=(const CPUVector &other) {
this->assign(other.begin(), other.end());
return *this;
}
CPUVector &operator=(const std::vector<T> &other) {
this->assign(other.begin(), other.end());
return *this;
}

friend std::ostream &operator<<(std::ostream &os, const CPUVector<T> &other) {
std::stringstream ss;
for (auto v : other) {
os << v << " ";
}
return os;
}

T &operator[](size_t id) { return this->at(id); }

const T &operator[](size_t id) const { return this->at(id); }

template <typename D>
void Extend(const D &begin, const D &end) {
this->reserve(this->size() + size_t(end - begin));
this->insert(this->end(), begin, end);
}

const T *CUDAData(platform::Place place) const {
PADDLE_THROW(platform::errors::Unavailable(
"Vector::CUDAData() method is not supported in CPU-only version."));
}

T *CUDAMutableData(platform::Place place) {
PADDLE_THROW(platform::errors::Unavailable(
"Vector::CUDAMutableData() method is not supported in CPU-only "
"version."));
}

const T *Data(platform::Place place) const {
PADDLE_ENFORCE_EQ(
platform::is_cpu_place(place), true,
platform::errors::Unavailable(
"Vector::Data() method is not supported when not in CPUPlace."));
return this->data();
}

T *MutableData(platform::Place place) {
PADDLE_ENFORCE_EQ(
platform::is_cpu_place(place), true,
platform::errors::Unavailable("Vector::MutableData() method is not "
"supported when not in CPUPlace."));
return this->data();
}

const void *Handle() const { return static_cast<const void *>(this); }
};

template <typename T>
using Vector = CPUVector<T>;

#endif // PADDLE_WITH_CUDA

}; // namespace framework
} // namespace paddle
1 change: 1 addition & 0 deletions paddle/fluid/framework/mixed_vector_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "gtest/gtest.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device_context.h"

template <typename T>
using vec = paddle::framework::Vector<T>;
Expand Down
6 changes: 1 addition & 5 deletions paddle/fluid/operators/filter_by_instag_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,9 @@ namespace operators {
using Tensor = framework::Tensor;
using SelectedRows = framework::SelectedRows;
using LoDTensor = framework::LoDTensor;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)

template <typename T>
using Vector = framework::Vector<T>;
#else
template <typename T>
using Vector = framework::CPUVector<T>;
#endif

template <typename T>
class FilterByInstagKernel : public framework::OpKernel<T> {
Expand Down
6 changes: 1 addition & 5 deletions paddle/fluid/operators/shuffle_batch_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,9 @@ namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)

template <typename T>
using Vector = framework::Vector<T>;
#else
template <typename T>
using Vector = framework::CPUVector<T>;
#endif

template <typename T>
class ShuffleBatchKernel : public framework::OpKernel<T> {
Expand Down
4 changes: 2 additions & 2 deletions paddle/pten/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ endif()

cc_library(kernel_factory SRCS kernel_factory.cc DEPS enforce convert_utils)
cc_library(kernel_context SRCS kernel_context.cc DEPS enforce pten_context)

cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS enforce)
cc_library(tensor_meta SRCS tensor_meta.cc DEPS enforce)
cc_library(tensor_meta SRCS tensor_meta.cc DEPS enforce mixed_vector)

cc_library(dense_tensor SRCS dense_tensor.cc DEPS tensor_meta tensor_base)
4 changes: 1 addition & 3 deletions paddle/pten/core/dense_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,7 @@ class DenseTensor : public TensorBase,

/// \brief Returns the lod of the tensor.
/// \return The lod of the tensor.
const std::vector<std::vector<size_t>>& lod() const noexcept {
return meta_.lod;
}
const LoD& lod() const noexcept { return meta_.lod; }

/// \brief Returns the data type of the tensor.
/// \return The data type of the tensor.
Expand Down
2 changes: 1 addition & 1 deletion paddle/pten/core/tensor_meta.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ DenseTensorMeta::DenseTensorMeta(DataType dtype,
DenseTensorMeta::DenseTensorMeta(DataType dtype,
const DDim& dims,
DataLayout layout,
const std::vector<std::vector<size_t>>& lod)
const LoD& lod)
: dims(dims), dtype(dtype), layout(layout), lod(lod) {}

bool DenseTensorMeta::valid() const noexcept {
Expand Down
9 changes: 5 additions & 4 deletions paddle/pten/core/tensor_meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,16 @@ limitations under the License. */

// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/framework/ddim.h"

// Note: mixed_vector include many header now, LoD will be
// used on CUDA device? Can we use small_vector here?
// #include "paddle/fluid/framework/mixed_vector.h"
// @zhanlve: Rollback to original LoD for now
#include "paddle/fluid/framework/mixed_vector.h"

namespace pten {

using DDim = paddle::framework::DDim;
using LoD = std::vector<std::vector<size_t>>;

using LoD = std::vector<paddle::framework::Vector<size_t>>;
/// \brief The meta data of dense tensor. Take the structure type
/// and use all default operations.
///
Expand All @@ -44,7 +45,7 @@ struct DenseTensorMeta {
DenseTensorMeta(DataType dtype,
const DDim& dims,
DataLayout layout,
const std::vector<std::vector<size_t>>& lod);
const LoD& lod);

/// \brief Test whether the metadata is valid. Does not throw exceptions.
/// \return Whether the metadata is valid.
Expand Down
5 changes: 3 additions & 2 deletions paddle/pten/tests/api/test_tensor_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ limitations under the License. */
#include "gtest/gtest.h"

#include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/core/tensor_meta.h"

namespace paddle {
namespace tests {
Expand All @@ -30,7 +31,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) {
const DDim dims({2, 1});
const DataType dtype{DataType::FLOAT32};
const DataLayout layout{DataLayout::NCHW};
const std::vector<std::vector<size_t>> lod{{0, 2}};
const pten::LoD lod{{0, 2}};
DenseTensorMeta meta(dtype, dims, layout, lod);

auto alloc =
Expand All @@ -46,7 +47,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) {

CHECK(dense_tensor.lod().size() == lod_tensor.lod().size());
CHECK(dense_tensor.lod()[0] ==
static_cast<std::vector<size_t>>((lod_tensor.lod()[0])));
static_cast<paddle::framework::Vector<size_t>>((lod_tensor.lod()[0])));
CHECK(dense_tensor.dtype() == pten::TransToPtenDataType(lod_tensor.type()));
CHECK(dense_tensor.layout() ==
pten::TransToPtenDataLayout(lod_tensor.layout()));
Expand Down
Loading