Skip to content

Commit

Permalink
deal with conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
YuanRisheng committed Jan 25, 2022
2 parents 33bb053 + c1e5a39 commit feedeed
Show file tree
Hide file tree
Showing 36 changed files with 1,004 additions and 475 deletions.
7 changes: 7 additions & 0 deletions paddle/fluid/framework/pten_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,5 +86,12 @@ struct ConvertToPtenContext<platform::CPUDeviceContext> {
using TYPE = pten::CPUContext;
};

#ifdef PADDLE_WITH_XPU
template <>
struct ConvertToPtenContext<platform::XPUDeviceContext> {
using TYPE = pten::XPUContext;
};
#endif

} // namespace framework
} // namespace paddle
7 changes: 7 additions & 0 deletions paddle/fluid/memory/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ if (WITH_ROCM)
DEPS device_context malloc)
endif()

if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
nv_test(get_base_ptr_test SRCS get_base_ptr_test.cu DEPS malloc gpu_info)
set_tests_properties(get_base_ptr_test PROPERTIES
ENVIRONMENT "FLAGS_allocator_strategy=auto_growth;
FLAGS_use_stream_safe_cuda_allocator=true;")
endif()

#if (WITH_GPU)
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory)
#endif()
7 changes: 0 additions & 7 deletions paddle/fluid/memory/allocation/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,3 @@ if(NOT WIN32)
cc_library(mmap_allocator SRCS mmap_allocator.cc DEPS allocator)
cc_test(mmap_allocator_test SRCS mmap_allocator_test.cc DEPS mmap_allocator allocator)
endif(NOT WIN32)

if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
nv_test(base_ptr_test SRCS base_ptr_test.cu DEPS malloc gpu_info)
set_tests_properties(base_ptr_test PROPERTIES
ENVIRONMENT "FLAGS_allocator_strategy=auto_growth;
FLAGS_use_stream_safe_cuda_allocator=true;")
endif()
9 changes: 1 addition & 8 deletions paddle/fluid/memory/allocation/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,7 @@ class Allocation : public pten::Allocation {
const platform::Place& place)
: pten::Allocation(ptr, size, place), base_ptr_(base_ptr) {}

void* base_ptr() const {
PADDLE_ENFORCE_EQ(FLAGS_allocator_strategy, "auto_growth",
paddle::platform::errors::Unimplemented(
"base_ptr() is only implemented for auto_growth "
"strategy, not support %s strategy",
FLAGS_allocator_strategy));
return base_ptr_;
}
void* base_ptr() const { return base_ptr_; }

private:
inline void RegisterDecoratedAllocator(Allocator* allocator) {
Expand Down
19 changes: 19 additions & 0 deletions paddle/fluid/memory/allocation/allocator_facade.cc
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,10 @@ class AllocatorFacadePrivate {
return iter->second;
}

void* GetBasePtr(const std::shared_ptr<pten::Allocation>& allocation) {
return static_cast<Allocation*>(allocation.get())->base_ptr();
}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
bool HasCUDAAllocator(const platform::CUDAPlace& place,
const gpuStream_t& stream) {
Expand Down Expand Up @@ -821,6 +825,21 @@ const std::shared_ptr<Allocator>& AllocatorFacade::GetAllocator(
return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1);
}

void* AllocatorFacade::GetBasePtr(
const std::shared_ptr<pten::Allocation>& allocation) {
PADDLE_ENFORCE_EQ(GetAllocatorStrategy(), AllocatorStrategy::kAutoGrowth,
paddle::platform::errors::Unimplemented(
"GetBasePtr() is only implemented for auto_growth "
"strategy, not support allocator strategy: %d",
static_cast<int>(GetAllocatorStrategy())));
PADDLE_ENFORCE_EQ(platform::is_gpu_place(allocation->place()), true,
paddle::platform::errors::Unimplemented(
"GetBasePtr() is only implemented for CUDAPlace(), not "
"suppot place: %s",
allocation->place()));
return m_->GetBasePtr(allocation);
}

std::shared_ptr<pten::Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size) {
return std::shared_ptr<pten::Allocation>(Alloc(place, size));
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/memory/allocation/allocator_facade.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class AllocatorFacade {

const std::shared_ptr<Allocator>& GetAllocator(const platform::Place& place);

void* GetBasePtr(const std::shared_ptr<Allocation>& allocation);

// Allocate a shared allocation.
std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
size_t size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
void OneByOneAllocTest() {
for (size_t i = 0; i < alloc_times_; ++i) {
size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size);
auto allocation = AllocShared(place_, size);

void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* base_ptr = GetBasePtr(allocation);
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);
Expand All @@ -47,21 +47,21 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
}

void BatchByBatchAllocTest() {
std::vector<AllocationPtr> allocations;
std::vector<std::shared_ptr<pten::Allocation>> allocations;
allocations.reserve(batch_size_);
size_t batch_num = alloc_times_ / batch_size_;

for (size_t i = 0; i < batch_num; ++i) {
for (size_t j = 0; j < batch_size_; ++j) {
size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size);
auto allocation = AllocShared(place_, size);

void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* base_ptr = GetBasePtr(allocation);
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);

allocations.emplace_back(std::move(allocation));
allocations.emplace_back(allocation);
}
allocations.clear();
}
Expand All @@ -70,28 +70,28 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
}

void ContinuousAllocTest() {
std::vector<AllocationPtr> allocations;
std::vector<std::shared_ptr<pten::Allocation>> allocations;
allocations.reserve(alloc_times_);

for (size_t i = 0; i < alloc_times_; ++i) {
size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size);
auto allocation = AllocShared(place_, size);

void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* base_ptr = GetBasePtr(allocation);
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);

allocations.emplace_back(std::move(allocation));
allocations.emplace_back(allocation);
}

allocations.clear();
Release(place_);
}

void ZeroSizeAllocTest() {
AllocationPtr allocation = Alloc(place_, 0);
void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
auto allocation = AllocShared(place_, 0);
void* base_ptr = GetBasePtr(allocation);
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/memory/malloc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ bool InSameStream(const std::shared_ptr<Allocation>& allocation,
stream);
}

void* GetBasePtr(const std::shared_ptr<Allocation>& allocation) {
return allocation::AllocatorFacade::Instance().GetBasePtr(allocation);
}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size,
const gpuStream_t& stream) {
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/memory/malloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ extern std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
extern bool InSameStream(const std::shared_ptr<Allocation>& allocation,
const platform::Stream& stream);

extern void* GetBasePtr(const std::shared_ptr<Allocation>& allocation);

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
extern AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size,
const gpuStream_t& stream);
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,11 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
inverse_scale = 0.0;
}

paddle::platform::XPUVersion version = dev_ctx.xpu_version();
auto version = dev_ctx.xpu_version();
framework::Tensor float_x;
framework::Tensor float_out;
if (std::is_same<T, paddle::platform::float16>::value &&
(version == paddle::platform::XPUVersion::XPU1)) {
(version == pten::backends::xpu::XPUVersion::XPU1)) {
float_x.mutable_data<MPDType>(dev_ctx.GetPlace(),
x->numel() * sizeof(MPDType));
float_out.mutable_data<MPDType>(dev_ctx.GetPlace(),
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/dropout_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ class DropoutGradXPUKernel : public framework::OpKernel<T> {
return;
}

paddle::platform::XPUVersion version = dev_ctx.xpu_version();
if (version == paddle::platform::XPUVersion::XPU1) {
auto version = dev_ctx.xpu_version();
if (version == pten::backends::xpu::XPUVersion::XPU1) {
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
XPUType* mask_new = RAII_GUARD.alloc_l3_or_gm<XPUType>(mask->numel());
float scale =
Expand Down
9 changes: 6 additions & 3 deletions paddle/fluid/operators/reshape_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,8 @@ class ReshapeKernel {
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
pten::ReshapeKernel(dev_ctx, *in, pt_scalar_shape, out);
pten::ReshapeKernel(static_cast<const pten::XPUContext &>(dev_ctx), *in,
pt_scalar_shape, out);
}
#endif
}
Expand All @@ -422,7 +423,8 @@ class ReshapeGradKernel {
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
pten::ReshapeGradKernel(dev_ctx, *d_out, d_x);
pten::ReshapeGradKernel(static_cast<const pten::XPUContext &>(dev_ctx),
*d_out, d_x);
}
#endif
}
Expand All @@ -449,7 +451,8 @@ class ReshapeDoubleGradKernel {
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
pten::ReshapeDoubleGradKernel(dev_ctx, *dd_x, dd_out);
pten::ReshapeDoubleGradKernel(
static_cast<const pten::XPUContext &>(dev_ctx), *dd_x, dd_out);
}
#endif
}
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/softmax_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ class SoftmaxXPUKernel : public framework::OpKernel<T> {
auto& dev_ctx = context.template device_context<DeviceContext>();

int r = XPU_SUCCESS;
paddle::platform::XPUVersion version = dev_ctx.xpu_version();
if (version == paddle::platform::XPUVersion::XPU1) {
auto version = dev_ctx.xpu_version();
if (version == pten::backends::xpu::XPUVersion::XPU1) {
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
XPUType* clip_x_data_l3 = RAII_GUARD.alloc_l3_or_gm<XPUType>(x->numel());
r = xpu::clip_v2(dev_ctx.x_context(),
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/platform/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc DEPS boost)
cc_library(device_context SRCS device_context.cc init.cc DEPS simple_threadpool malloc xxhash ${STREAM_CALLBACK_DEPS}
place pten_place eigen3 stringpiece cpu_helper cpu_info framework_proto ${IPU_CTX_DEPS} ${GPU_CTX_DEPS} ${NPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}
${dgc_deps} dlpack cudnn_workspace_helper ${XPU_CTX_DEPS} ${MLU_CTX_DEPS} cpu_context)
if(WITH_XPU)
target_link_libraries(device_context xpu_context)
endif()

cc_library(collective_helper SRCS collective_helper.cc gen_comm_id_helper.cc DEPS framework_proto device_context enforce)
if(WITH_ASCEND_CL)
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/platform/device/xpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ endif()

set(XPU_CTX_DEPS xpulib ssl crypto rt z resolv dl)

cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib device_context place)
cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib device_context place pten_xpu_info)
cc_library(xpu_op_list SRCS xpu_op_list.cc DEPS gflags glog enforce xpulib device_context)

add_subdirectory(tests)
Loading

0 comments on commit feedeed

Please sign in to comment.