-
Notifications
You must be signed in to change notification settings - Fork 5.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* Revert "Revert "Add copy from tensor (#34406)" (#35173)" This reverts commit 32c1ec4. * add template instantiation
- Loading branch information
1 parent
00c9aeb
commit 6116f9a
Showing
11 changed files
with
725 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#include "paddle/fluid/inference/api/paddle_infer_contrib.h" | ||
#include "paddle/fluid/framework/scope.h" | ||
#include "paddle/fluid/memory/memcpy.h" | ||
#include "paddle/fluid/platform/device_context.h" | ||
#include "paddle/fluid/platform/enforce.h" | ||
#include "paddle/fluid/platform/float16.h" | ||
|
||
namespace paddle_infer { | ||
namespace contrib { | ||
|
||
using paddle::PaddleDType; | ||
|
||
void* TensorUtils::CudaMallocPinnedMemory(size_t size) { | ||
#if defined(PADDLE_WITH_CUDA) | ||
void* ptr = nullptr; | ||
PADDLE_ENFORCE_CUDA_SUCCESS(cudaMallocHost(&ptr, size)); | ||
return ptr; | ||
#else | ||
return nullptr; | ||
#endif | ||
} | ||
|
||
void TensorUtils::CudaFreePinnedMemory(void* ptr) { | ||
#if defined(PADDLE_WITH_CUDA) | ||
PADDLE_ENFORCE_CUDA_SUCCESS(cudaFreeHost(ptr)); | ||
#endif | ||
} | ||
|
||
void TensorUtils::CopyTensorImpl(Tensor* p_dst, const Tensor& src, | ||
void* exec_stream, CallbackFunc cb, | ||
void* cb_params) { | ||
Tensor& dst = *p_dst; | ||
dst.Reshape(src.shape()); | ||
PADDLE_ENFORCE( | ||
src.place() == PlaceType::kCPU || src.place() == PlaceType::kGPU, | ||
paddle::platform::errors::InvalidArgument( | ||
"CopyTensor only support PlaceType kCPU/kGPU now.")); | ||
PADDLE_ENFORCE( | ||
dst.place() == PlaceType::kCPU || dst.place() == PlaceType::kGPU, | ||
paddle::platform::errors::InvalidArgument( | ||
"CopyTensor only support PlaceType kCPU/kGPU now.")); | ||
// copy to cpu, gpu => cpu or cpu => cpu | ||
if (dst.place() == PlaceType::kCPU) { | ||
switch (src.type()) { | ||
case PaddleDType::INT32: | ||
src.CopyToCpuImpl(dst.mutable_data<int32_t>(PlaceType::kCPU), | ||
exec_stream, cb, cb_params); | ||
break; | ||
case PaddleDType::INT64: | ||
src.CopyToCpuImpl(dst.mutable_data<int64_t>(PlaceType::kCPU), | ||
exec_stream, cb, cb_params); | ||
break; | ||
case PaddleDType::FLOAT32: | ||
src.CopyToCpuImpl(dst.mutable_data<float>(PlaceType::kCPU), exec_stream, | ||
cb, cb_params); | ||
break; | ||
case PaddleDType::UINT8: | ||
src.CopyToCpuImpl(dst.mutable_data<uint8_t>(PlaceType::kCPU), | ||
exec_stream, cb, cb_params); | ||
break; | ||
case PaddleDType::INT8: | ||
src.CopyToCpuImpl(dst.mutable_data<int8_t>(PlaceType::kCPU), | ||
exec_stream, cb, cb_params); | ||
break; | ||
case PaddleDType::FLOAT16: | ||
src.CopyToCpuImpl( | ||
dst.mutable_data<paddle::platform::float16>(PlaceType::kCPU), | ||
exec_stream, cb, cb_params); | ||
break; | ||
default: | ||
PADDLE_THROW(paddle::platform::errors::Unimplemented( | ||
"Only INT32, INT64, UINT8, INT8, FLOAT16 and " | ||
"FLOAT32 is supported in Tensor. Others not implements")); | ||
} | ||
// gpu => gpu or cpu => gpu | ||
} else { | ||
#if defined(PADDLE_WITH_CUDA) | ||
void* dst_data = nullptr; | ||
void* src_data = nullptr; | ||
size_t data_len = 0; | ||
int data_size = 0; | ||
PlaceType src_place; | ||
switch (src.type()) { | ||
case PaddleDType::INT32: | ||
dst_data = | ||
static_cast<void*>(dst.mutable_data<int32_t>(PlaceType::kGPU)); | ||
src_data = | ||
static_cast<void*>(src.data<int32_t>(&src_place, &data_size)); | ||
data_len = data_size * sizeof(int32_t); | ||
break; | ||
case PaddleDType::INT64: | ||
dst_data = | ||
static_cast<void*>(dst.mutable_data<int64_t>(PlaceType::kGPU)); | ||
src_data = | ||
static_cast<void*>(src.data<int64_t>(&src_place, &data_size)); | ||
data_len = data_size * sizeof(int64_t); | ||
break; | ||
case PaddleDType::FLOAT32: | ||
dst_data = static_cast<void*>(dst.mutable_data<float>(PlaceType::kGPU)); | ||
src_data = static_cast<void*>(src.data<float>(&src_place, &data_size)); | ||
data_len = data_size * sizeof(float); | ||
break; | ||
case PaddleDType::UINT8: | ||
dst_data = | ||
static_cast<void*>(dst.mutable_data<uint8_t>(PlaceType::kGPU)); | ||
src_data = | ||
static_cast<void*>(src.data<uint8_t>(&src_place, &data_size)); | ||
data_len = data_size * sizeof(uint8_t); | ||
break; | ||
case PaddleDType::INT8: | ||
dst_data = | ||
static_cast<void*>(dst.mutable_data<int8_t>(PlaceType::kGPU)); | ||
src_data = static_cast<void*>(src.data<int8_t>(&src_place, &data_size)); | ||
data_len = data_size * sizeof(int8_t); | ||
break; | ||
case PaddleDType::FLOAT16: | ||
dst_data = static_cast<void*>( | ||
dst.mutable_data<paddle::platform::float16>(PlaceType::kGPU)); | ||
src_data = static_cast<void*>( | ||
src.data<paddle::platform::float16>(&src_place, &data_size)); | ||
data_len = data_size * 2; | ||
break; | ||
default: | ||
PADDLE_THROW(paddle::platform::errors::Unimplemented( | ||
"Only INT32, INT64, UINT8, INT8, FLOAT16 and " | ||
"FLOAT32 is supported in Tensor. Others not implements")); | ||
} | ||
|
||
paddle::platform::DeviceContextPool& pool = | ||
paddle::platform::DeviceContextPool::Instance(); | ||
paddle::platform::CUDAPlace gpu_place(dst.device_); | ||
auto* dev_ctx = static_cast<const paddle::platform::CUDADeviceContext*>( | ||
pool.Get(gpu_place)); | ||
|
||
if (src.place() == PlaceType::kCPU) { | ||
paddle::memory::Copy(gpu_place, static_cast<void*>(dst_data), | ||
paddle::platform::CPUPlace(), src_data, data_len, | ||
dev_ctx->stream()); | ||
} else { | ||
paddle::memory::Copy(gpu_place, static_cast<void*>(dst_data), | ||
paddle::platform::CUDAPlace(), src_data, data_len, | ||
dev_ctx->stream()); | ||
} | ||
|
||
if (nullptr != exec_stream) { | ||
*(static_cast<cudaStream_t*>(exec_stream)) = dev_ctx->stream(); | ||
} else if (cb) { | ||
cudaLaunchHostFunc(dev_ctx->stream(), cb, cb_params); | ||
} else { | ||
cudaStreamSynchronize(dev_ctx->stream()); | ||
} | ||
#else | ||
PADDLE_THROW(paddle::platform::errors::Unavailable( | ||
"Can not copy tensor to GPU CUDA place because paddle is not compiled " | ||
"with CUDA.")); | ||
#endif | ||
} | ||
return; | ||
} | ||
|
||
void TensorUtils::CopyTensor(Tensor* p_dst, const Tensor& src) { | ||
CopyTensorImpl(p_dst, src, nullptr, nullptr, nullptr); | ||
} | ||
|
||
void TensorUtils::CopyTensorAsync(Tensor* p_dst, const Tensor& src, | ||
void* exec_stream) { | ||
CopyTensorImpl(p_dst, src, exec_stream, nullptr, nullptr); | ||
} | ||
|
||
void TensorUtils::CopyTensorAsync(Tensor* p_dst, const Tensor& src, | ||
CallbackFunc cb, void* cb_params) { | ||
CopyTensorImpl(p_dst, src, nullptr, cb, cb_params); | ||
} | ||
|
||
} // namespace contrib | ||
} // namespace paddle_infer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#pragma once | ||
|
||
#include "paddle/fluid/inference/api/paddle_inference_api.h" | ||
|
||
namespace paddle_infer { | ||
namespace contrib { | ||
|
||
class TensorUtils { | ||
public: | ||
static void* CudaMallocPinnedMemory(size_t size); | ||
static void CudaFreePinnedMemory(void* mem); | ||
|
||
static void CopyTensor(Tensor* p_dst, const Tensor& src); | ||
static void CopyTensorAsync(Tensor* p_dst, const Tensor& src, | ||
void* exec_stream); | ||
static void CopyTensorAsync(Tensor* p_dst, const Tensor& src, CallbackFunc cb, | ||
void* cb_params); | ||
|
||
private: | ||
static void CopyTensorImpl(Tensor* p_dst, const Tensor& src, | ||
void* exec_stream, CallbackFunc cb, | ||
void* cb_params); | ||
}; | ||
|
||
} // namespace contrib | ||
} // namespace paddle_infer |
Oops, something went wrong.