Skip to content

Commit

Permalink
remove mkldnn tensor & polish details
Browse files Browse the repository at this point in the history
  • Loading branch information
chenwhql committed Oct 14, 2021
1 parent 06789ba commit 3f5f789
Show file tree
Hide file tree
Showing 23 changed files with 249 additions and 208 deletions.
2 changes: 1 addition & 1 deletion cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ function(find_fluid_modules TARGET_NAME)
endfunction(find_fluid_modules)

set_property(GLOBAL PROPERTY TCMPT_MODULES "")
# find all top modules is used for paddle static library
# find all tcmpt modules is used for paddle static library
# for building inference libs
function(find_tcmpt_modules TARGET_NAME)
get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
Expand Down
9 changes: 7 additions & 2 deletions cmake/tcmpt.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
# TODO(chenweihang): keep message comment for debuging, remove it if needless
# `kernel_instantiate` functionis used to declare the template instantiation of
# the Kernel function generated through code analysis, only for windows
# (because the windows platform msvc compiler cannot automatically instantiate
# the template function through decltype)
# TODO(chenweihang): keep message comment for debuging, it is still useful,
# I will remove it if needless later

function(kernel_instantiate TARGET)
set(target_file ${CURRENT_BINARY_DIR}/${TARGET}.tmp CACHE INTERNAL "${CURRENT_BINARY_DIR}/${TARGET} file")
set(target_file_final ${CURRENT_BINARY_DIR}/${TARGET})
Expand Down Expand Up @@ -36,7 +42,6 @@ function(kernel_instantiate TARGET)
endforeach()
# message(STATUS "INST CONTENT: ${instantiate_context}")
file(APPEND ${target_file} "${instantiate_context}\n")
# copy_if_different(${target_file} ${target_file_final})
string(REPLACE "." "_" cmd_name ${TARGET})
# this is a dummy target for custom command, should always be run firstly to update ${target_file_final}
# TODO(chenweihang): nameing rule need to enchance
Expand Down
44 changes: 0 additions & 44 deletions paddle/fluid/framework/eigen.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h"
#include "unsupported/Eigen/CXX11/Tensor"

#include "paddle/tcmpt/core/dense_tensor.h"

namespace paddle {
namespace framework {

Expand Down Expand Up @@ -69,28 +67,6 @@ struct EigenTensor {
static ConstType From(const Tensor& tensor) {
return From(tensor, tensor.dims_);
}

// for pt::DenseTensor
static Type From(pt::DenseTensor& tensor, DDim dims) { // NOLINT
// why tensor.data<T>() not work?
// return Type(const_cast<T*>(reinterpret_cast<const T*>(tensor.data())),
// EigenDim<D>::From(dims));
return Type(const_cast<T*>(tensor.data<T>()), EigenDim<D>::From(dims));
}

static Type From(pt::DenseTensor& tensor) { // NOLINT
return From(tensor, tensor.dims());
} // NOLINT

static ConstType From(const pt::DenseTensor& tensor, DDim dims) {
// return ConstType(reinterpret_cast<const T*>(tensor.data()),
// EigenDim<D>::From(dims));
return ConstType(tensor.data<T>(), EigenDim<D>::From(dims));
}

static ConstType From(const pt::DenseTensor& tensor) {
return From(tensor, tensor.dims());
}
};

template <typename T, int MajorType = Eigen::RowMajor,
Expand Down Expand Up @@ -133,17 +109,6 @@ struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
const Tensor& tensor) { // NOLINT
return EigenVector::From(tensor, {product(tensor.dims_)});
}

// for pt::DenseTensor
static typename EigenVector::Type Flatten(
pt::DenseTensor& tensor) { // NOLINT
return EigenVector::From(tensor, {product(tensor.dims())});
}

static typename EigenVector::ConstType Flatten(
const pt::DenseTensor& tensor) { // NOLINT
return EigenVector::From(tensor, {product(tensor.dims())});
}
};

template <typename T, int MajorType = Eigen::RowMajor,
Expand All @@ -160,15 +125,6 @@ struct EigenScalar {
static ConstType From(const Tensor& tensor) {
return ConstType(tensor.data<T>());
}

// for pt::DenseTensor
static Type From(pt::DenseTensor& tensor) { // NOLINT
return Type(const_cast<T*>(tensor.data<T>()));
}

static ConstType From(const pt::DenseTensor& tensor) {
return ConstType(tensor.data<T>());
}
};

// Define Tensor with 32-bit index.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/program_desc.h"

USE_NO_KERNEL_OP(scale);
USE_OP(scale);
USE_OP(elementwise_mul);
USE_OP(elementwise_add);
USE_OP(elementwise_add_grad);
Expand Down
15 changes: 0 additions & 15 deletions paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1155,7 +1155,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
// and RCOM backend, the XPU, NPU and MKLDNN will be supported in the second
// phase

// VLOG(1) << "Pt KernelFactory: " << pt::KernelFactory::Instance();
if (FLAGS_use_pt_kernel &&
pt::KernelFactory::Instance().ContainsKernel(type_.c_str())) {
if (pt_kernel_key_.get() == nullptr || pt_kernel_.get() == nullptr) {
Expand Down Expand Up @@ -1263,17 +1262,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
}
}

static bool ContainSelectedRows(const VariableValueMap& inputs) {
for (auto& var_pair : inputs) {
for (auto* var : var_pair.second) {
if (var->IsType<SelectedRows>()) {
return true;
}
}
}
return false;
}

// TODO(chenweihang): now only check single var input
static bool IsValidVar(const std::string& name,
const VariableValueMap& inputs) {
Expand Down Expand Up @@ -1303,9 +1291,6 @@ static pt::KernelName ConstructPtKernelName(const std::string& op_type,
const VariableValueMap& inputs) {
std::string overload_name;
// TODO(chenweihang): adapt SelectedRows by xiaowei's design
// if (ContainSelectedRows(inputs)) {
// overload_name = pt::kContainSelectedRowsSuffix;
// }
if (ContainHostTensor(op_proto, inputs)) {
if (overload_name != "") {
overload_name += ".";
Expand Down
38 changes: 1 addition & 37 deletions paddle/fluid/framework/tcmpt_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,15 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/tcmpt_utils.h"

#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/selected_rows.h"

#include "paddle/fluid/framework/variable.h"
#include "paddle/tcmpt/api/include/core.h"
#include "paddle/tcmpt/api/include/symbols.h"

namespace paddle {
namespace framework {

// TODO(chenweihang, shixiaowei): adapt SelectedRows

template <>
std::shared_ptr<pt::DenseTensor> MakeTensorImpl<pt::DenseTensor, LoDTensor>(
const LoDTensor& tensor, pt::Backend backend, pt::DataType dtype,
Expand Down Expand Up @@ -167,38 +164,5 @@ std::shared_ptr<pt::TensorInterface> OutputVariableToPtTensor(
return nullptr;
}

/* For MKLDNNDenseTensor (move this part into a single file later) */
#ifdef PADDLE_WITH_MKLDNN

template <>
std::shared_ptr<pt::MKLDNNDenseTensor> MakeTensorImpl<pt::MKLDNNDenseTensor>(
const Tensor& tensor, const platform::Place& place,
proto::VarType::Type type) {
auto holder = tensor.Holder();
auto tensor_impl = std::make_shared<pt::MKLDNNDenseTensor>(
pt::TensorMeta(tensor.dims(), pt::TransToPtBackend(place),
pt::TransToPtDataType(type),
pt::TransToPtLayout(tensor.layout()), tensor.offset()),
pt::TensorStatus());

if (holder != nullptr) {
tensor_impl->ShareAllocation(tensor.Holder());
} else {
VLOG(1) << "Old MKLDNN Tensor holder is nullptr.";
}

tensor_impl->set_format(tensor.format());
return tensor_impl;
}

template <>
void ShareTensorImpl(pt::MKLDNNDenseTensor* tensor_impl, Tensor* out) {
out->ResetHolderWithType(tensor_impl->allocation(),
pt::TransToProtoVarType(tensor_impl->type()));
out->set_format(tensor_impl->format());
}

#endif

} // namespace framework
} // namespace paddle
2 changes: 0 additions & 2 deletions paddle/fluid/framework/type_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ class BlockDesc;
class Variable;
class InferNoNeedBufferVarsFN;

// TODO(chenweihang): AttirbuteMap also need to be ordered
// TODO(panyx0718): Replace vector with something like gtl::Vector.
using VariableNameMap = std::map<std::string, std::vector<std::string>>;
using VariableValueMap = std::map<std::string, std::vector<Variable*>>;
Expand All @@ -44,7 +43,6 @@ using Attribute = boost::variant<
std::vector<std::string>, bool, std::vector<bool>, BlockDesc*, int64_t,
std::vector<BlockDesc*>, std::vector<int64_t>, std::vector<double>>;

// TODO(chenweihang): AttirbuteMap also need to be ordered
using AttributeMap = std::unordered_map<std::string, Attribute>;

#ifdef PADDLE_WITH_ASCEND_CL
Expand Down
15 changes: 0 additions & 15 deletions paddle/fluid/imperative/prepared_operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,18 +137,6 @@ static framework::VariableValueMap BuildInputMap(
return inputs;
}

template <typename VarType>
static bool ContainSelectedRows(const NameVarMap<VarType>& inputs) {
for (auto& var_pair : inputs) {
for (auto& var : var_pair.second) {
if (var->Var().template IsType<framework::SelectedRows>()) {
return true;
}
}
}
return false;
}

// TODO(chenweihang): enhance rules, not all dispensable inputs
// are host tensor, now only for scale kernel verify
template <typename VarType>
Expand All @@ -169,9 +157,6 @@ static pt::KernelName ConstructPtKernelName(
const NameVarMap<VarType>& inputs) {
std::string overload_name;
// TODO(chenweihang): adapt SelectedRows by xiaowei's design
// if (ContainSelectedRows<VarType>(inputs)) {
// overload_name = pt::kContainSelectedRowsSuffix;
// }
if (ContainHostTensor<VarType>(op_proto, inputs)) {
if (overload_name != "") {
overload_name += ".";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ using MultiVarMsg = ::paddle::distributed::MultiVariableMessage;
using VarMsg = ::paddle::distributed::VariableMessage;
DECLARE_double(eager_delete_tensor_gb);

USE_NO_KERNEL_OP(scale);
USE_OP(scale);
USE_NO_KERNEL_OP(heter_listen_and_serv);

framework::BlockDesc* AppendSendAndRecvBlock(framework::ProgramDesc* program) {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/pscore/heter_server_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ namespace distributed = paddle::distributed;
using MultiVarMsg = ::paddle::distributed::MultiVariableMessage;
using VarMsg = ::paddle::distributed::VariableMessage;

USE_NO_KERNEL_OP(scale);
USE_OP(scale);

std::shared_ptr<distributed::HeterServer> b_rpc_service;

Expand Down
1 change: 0 additions & 1 deletion paddle/fluid/operators/scale_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ limitations under the License. */

namespace paddle {
namespace operators {

template <typename DeviceContext, typename T>
class ScaleXPUKernel : public framework::OpKernel<T> {
public:
Expand Down
3 changes: 1 addition & 2 deletions paddle/fluid/operators/sign_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <memory>

#include "paddle/fluid/operators/sign_op.h"
#include <memory>
#include "paddle/fluid/platform/float16.h"

namespace paddle {
Expand Down
1 change: 0 additions & 1 deletion paddle/tcmpt/api/include/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,4 @@ limitations under the License. */
#include "paddle/tcmpt/core/dense_tensor.h"
#include "paddle/tcmpt/core/kernel_context.h"
#include "paddle/tcmpt/core/kernel_factory.h"
#include "paddle/tcmpt/core/mkldnn_dense_tensor.h"
#include "paddle/tcmpt/core/scalar.h"
56 changes: 0 additions & 56 deletions paddle/tcmpt/core/mkldnn_dense_tensor.h

This file was deleted.

1 change: 1 addition & 0 deletions paddle/tcmpt/cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
if(WIN32)
set(CURRENT_BINARY_DIR ${PADDLE_BINARY_DIR}/paddle/tcmpt/cpu)
kernel_instantiate(creation.cc)
kernel_instantiate(math.cc)
kernel_instantiate(linalg.cc)
endif()
Expand Down
1 change: 1 addition & 0 deletions paddle/tcmpt/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
if(WIN32)
set(CURRENT_BINARY_DIR ${PADDLE_BINARY_DIR}/paddle/tcmpt/cuda)
kernel_instantiate(creation.cu)
kernel_instantiate(math.cu)
kernel_instantiate(linalg.cu)
endif()
Expand Down
20 changes: 2 additions & 18 deletions paddle/tcmpt/cuda/linalg.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@
#include "paddle/tcmpt/cuda/linalg.h"

#include "paddle/tcmpt/core/kernel_registry.h"
#include "paddle/tcmpt/eigen/dot.h"

// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/platform/complex.h"

namespace pt {
Expand All @@ -28,22 +27,7 @@ void Dot(const CUDAContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
out->mutable_data();
if (1 == out->dims().size()) {
auto eigen_out = paddle::framework::EigenScalar<T>::From(*out);
auto eigen_x = paddle::framework::EigenVector<T>::Flatten(x);
auto eigen_y = paddle::framework::EigenVector<T>::Flatten(y);

auto& dev = *dev_ctx.eigen_device();
eigen_out.device(dev) = (eigen_x * eigen_y).sum();
} else {
auto eigen_out = paddle::framework::EigenMatrix<T>::From(*out);
auto eigen_x = paddle::framework::EigenMatrix<T>::From(x);
auto eigen_y = paddle::framework::EigenMatrix<T>::From(y);

auto& dev = *dev_ctx.eigen_device();
eigen_out.device(dev) = (eigen_x * eigen_y).sum(Eigen::DSizes<int, 1>(1));
}
eigen::Dot<CUDAContext, T>(dev_ctx, x, y, out);
}

} // namespace pt
Expand Down
Loading

1 comment on commit 3f5f789

@paddle-bot-old
Copy link

@paddle-bot-old paddle-bot-old bot commented on 3f5f789 Oct 15, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🕵️ CI failures summary

🔍 PR: #34425 Commit ID: 3f5f789 contains failed CI.

🔹 Failed: PR-CI-APPROVAL

approve_failed
2021-10-15 10:25:33 正在保存至: “bk.txt”
2021-10-15 10:25:33 0K 100% 3.47M=0s
2021-10-15 10:25:33 2021-10-15 10:25:33 (3.47 MB/s) - 已保存 “bk.txt” [5/5])
2021-10-15 10:25:40 ****************
2021-10-15 10:25:40 0. You must have one RD (lanxianghit (Recommend), phlrain or luotao1) approval for changing the FLAGS, which manages the environment variables.
2021-10-15 10:25:40 1. You must have Dianhai approval for change 20+ files or add than 1000+ lines of content.
2021-10-15 10:25:40 2. You must have one RD (XiaoguangHu01,chenwhql,zhiqiu,Xreki,luotao1) approval for paddle/fluid/framework/operator.h, which manages the underlying code for fluid.
2021-10-15 10:25:40 3. You must have one RD (zhiqiu (Recommend) , phlrain) approval for the changes of paddle/fluid/pybind/op_function_generator.cc, which manages the logic of automatic generating op functions for dygraph.
2021-10-15 10:25:40 4. You must have one RD (XiaoguangHu01,chenwhql,zhiqiu,Xreki,luotao1) approval for the usage of const_cast.
2021-10-15 10:25:40 5. You must have one RD (Avin0323(Recommend) or zhouwei25 or wanghuancoder or luotao1) approval for modifying unity_build_rule.cmake which the rules of Unity Build.
2021-10-15 10:25:40 There are 6 approved errors.
2021-10-15 10:25:40 ****************
2021-10-15 10:25:40 + EXCODE=6
2021-10-15 10:25:40 + echo 'EXCODE: 6'
2021-10-15 10:25:40 EXCODE: 6
2021-10-15 10:25:40 + echo 'ipipe_log_param_EXCODE: 6'
2021-10-15 10:25:40 ipipe_log_param_EXCODE: 6
2021-10-15 10:25:40 + exit 6

🔹 Failed: PR-CI-OP-benchmark

test_failed
2021-10-15 10:28:31 + '[' 8 -ne 0 ']'
2021-10-15 10:28:31 + LOG '[INFO] See https://github.com/PaddlePaddle/Paddle/wiki/PR-CI-OP-benchmark-Manual for details.'
2021-10-15 10:28:31 + echo '[tools/test_ci_op_benchmark.sh:275] [INFO] See https://github.com/PaddlePaddle/Paddle/wiki/PR-CI-OP-benchmark-Manual for details.'
2021-10-15 10:28:31 [tools/test_ci_op_benchmark.sh:275] [INFO] See https://github.com/PaddlePaddle/Paddle/wiki/PR-CI-OP-benchmark-Manual for details.
2021-10-15 10:28:31 + LOG '[INFO] Or you can apply for one RD (Avin0323(Recommend), Xreki, luotao1) approval to pass this PR.'
2021-10-15 10:28:31 + echo '[tools/test_ci_op_benchmark.sh:276] [INFO] Or you can apply for one RD (Avin0323(Recommend), Xreki, luotao1) approval to pass this PR.'
2021-10-15 10:28:31 [tools/test_ci_op_benchmark.sh:276] [INFO] Or you can apply for one RD (Avin0323(Recommend), Xreki, luotao1) approval to pass this PR.
2021-10-15 10:28:31 + exit 8
2021-10-15 10:28:31 + EXCODE=8
2021-10-15 10:28:31 + echo 'EXCODE: 8'
2021-10-15 10:28:31 EXCODE: 8
2021-10-15 10:28:31 + echo 'ipipe_log_param_EXCODE: 8'
2021-10-15 10:28:31 ipipe_log_param_EXCODE: 8
2021-10-15 10:28:31 + '[' 8 -eq 0 ']'
2021-10-15 10:28:31 + set +x
2021-10-15 10:28:31 Sorry, some tests failed.
2021-10-15 10:28:31 + exit 8
2021-10-15 10:28:31 {build code state=8}
2021-10-15 10:28:41 kill agent BUILD_CODE_FAIL

🔹 Failed: PR-CE-Framework

Unknown Failed
Unknown Failed

Please sign in to comment.