Skip to content

Commit

Permalink
[dy2s] speed up PartialProgram.__call__ (PaddlePaddle#58771)
Browse files Browse the repository at this point in the history
* move Tensor construction to cpp

* mv _remove_no_value to ASTStaticFunction

* update
  • Loading branch information
feifei-111 authored and SecretXV committed Nov 28, 2023
1 parent db3d9d5 commit 44721a7
Show file tree
Hide file tree
Showing 11 changed files with 328 additions and 107 deletions.
3 changes: 0 additions & 3 deletions paddle/fluid/eager/to_static/run_program_op_func.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ inline void run_program_ad_func(
auto x_names =
PADDLE_GET_CONST(std::vector<std::string>, attrs.at("x_names"));

egr::EagerUtils::PassStopGradient(false, &p_autograd_outs);
// Create GradOpNode (1 means [out_grad], 2 means [x_grad, paramx_grad])
auto grad_node = std::make_shared<GradNodeRunProgram>(1, 2);

Expand Down Expand Up @@ -270,8 +269,6 @@ inline void pir_run_program_ad_func(
PirRunProgramAPI(
x, params, out, middles, step_scope, require_any_grad, attrs);
if (!is_test && require_any_grad) {
egr::EagerUtils::PassStopGradient(false, &p_autograd_outs);

// Set Attributes
grad_node->SetAttrMap(attrs);

Expand Down
198 changes: 198 additions & 0 deletions paddle/fluid/pybind/eager_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ limitations under the License. */
#include <string>
#include <vector>

#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/hooks.h"
Expand All @@ -30,9 +31,12 @@ limitations under the License. */
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/operators/py_func_op.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/pir/dialect/operator/ir/op_type.h"
#include "paddle/fluid/pir/dialect/operator/utils/utils.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/pybind/eager.h"
#include "paddle/fluid/pybind/op_function_common.h"
#include "paddle/fluid/pybind/pir.h"
#include "paddle/fluid/pybind/tensor_py.h"
#include "paddle/phi/api/ext/op_meta_info.h"
#include "paddle/phi/common/data_type.h"
Expand All @@ -41,6 +45,7 @@ limitations under the License. */
#include "paddle/phi/core/distributed/auto_parallel/placement_types.h"
#include "paddle/phi/core/distributed/auto_parallel/process_mesh.h"
#include "paddle/phi/core/flags.h"
#include "paddle/pir/core/attribute.h"

PHI_DECLARE_bool(check_nan_inf);
PHI_DECLARE_int32(check_nan_inf_level);
Expand Down Expand Up @@ -1858,6 +1863,180 @@ std::vector<paddle::Tensor> GetTensorListFromPyObject(PyObject* obj,
paddle::Tensor& UnSafeGetTensorFromPyObject(PyObject* obj) {
return reinterpret_cast<TensorObject*>(obj)->tensor;
}

paddle::Tensor CreateTensorFromVarDesc(
const paddle::framework::VarDesc& var_desc) {
auto tensor = paddle::Tensor();

auto dtype = var_desc.GetDataType();
std::vector<int64_t> dims = var_desc.GetShape();

auto var_type = var_desc.GetType();

auto ddims = phi::make_ddim(dims);
tensor.set_name(var_desc.Name());
auto autograd_meta = egr::EagerUtils::autograd_meta(&tensor);
autograd_meta->SetPersistable(false);
autograd_meta->SetStopGradient(var_desc.StopGradient());

if (var_type == paddle::framework::proto::VarType::LOD_TENSOR) {
// TODO(jiabin): Maybe support LOD later
std::shared_ptr<phi::DenseTensor> dense_tensor = nullptr;
if (dims.size() == 1 && dims[0] == 0) {
std::shared_ptr<phi::Allocation> allocation_ptr = nullptr;
dense_tensor = std::make_shared<phi::DenseTensor>(
allocation_ptr,
phi::DenseTensorMeta(paddle::framework::TransToPhiDataType(dtype),
ddims));
} else {
// TODO(dev): we need enhance check for ddims.
dense_tensor = std::make_shared<phi::DenseTensor>(
std::make_shared<phi::Allocation>(),
phi::DenseTensorMeta(paddle::framework::TransToPhiDataType(dtype),
ddims));
}
tensor.set_impl(dense_tensor);
} else if (var_type == paddle::framework::proto::VarType::SELECTED_ROWS) {
std::shared_ptr<phi::SelectedRows> selected_rows_tensor =
std::make_shared<phi::SelectedRows>();
tensor.set_impl(selected_rows_tensor);
}

if (!autograd_meta->GetMutableGradNode()) {
autograd_meta->SetGradNode(
std::make_shared<egr::GradNodeAccumulation>(autograd_meta));
}

return tensor;
}

PyObject* GetEmpytyTensorsWithVarDesc(PyObject* self, PyObject* args) {
std::vector<paddle::Tensor> result;
std::unordered_map<std::string, paddle::Tensor> out_tensor_map;

auto var_desc_list = PyTuple_GetItem(args, 0);

if (PyList_Check(var_desc_list)) {
Py_ssize_t len = PyList_Size(var_desc_list);
for (Py_ssize_t i = 0; i < len; i++) {
auto var_desc = PyObjectCast<paddle::framework::VarDesc>(
PyList_GetItem(var_desc_list, i));
auto var_name = var_desc.Name();
if (out_tensor_map.find(var_name) == out_tensor_map.end()) {
paddle::Tensor tensor = CreateTensorFromVarDesc(var_desc);
out_tensor_map[var_name] = tensor;
result.emplace_back(tensor);
} else {
result.emplace_back(out_tensor_map[var_name]);
}
}
} else if (PyTuple_Check(var_desc_list)) {
Py_ssize_t len = PyTuple_Size(var_desc_list);
for (Py_ssize_t i = 0; i < len; i++) {
auto var_desc = PyObjectCast<paddle::framework::VarDesc>(
PyTuple_GetItem(var_desc_list, i));
auto var_name = var_desc.Name();
if (out_tensor_map.find(var_name) == out_tensor_map.end()) {
paddle::Tensor tensor = CreateTensorFromVarDesc(var_desc);
out_tensor_map[var_name] = tensor;
result.emplace_back(tensor);
} else {
result.emplace_back(out_tensor_map[var_name]);
}
}
} else if (var_desc_list != Py_None) {
PADDLE_THROW(platform::errors::InvalidArgument(
"Argument of CreateTensorsWithVarDesc must be list of VarDesc, but got "
"%s",
(reinterpret_cast<PyTypeObject*>(var_desc_list->ob_type))->tp_name));
}
return ToPyObject(result);
}

paddle::Tensor CreateTensorFromOpResult(const pir::OpResult& op_result) {
auto tensor = paddle::Tensor();

auto dims = phi::vectorize(GetOpResultDims(op_result));
auto ddims = phi::make_ddim(dims);
auto autograd_meta = egr::EagerUtils::autograd_meta(&tensor);
autograd_meta->SetPersistable(false);
autograd_meta->SetStopGradient(
GetOpResultBoolAttr(op_result, kAttrStopGradients));

if (op_result.type().isa<paddle::dialect::DenseTensorType>()) {
// TODO(jiabin): Maybe support LOD later
std::shared_ptr<phi::DenseTensor> dense_tensor = nullptr;
auto dtype = paddle::dialect::TransToPhiDataType(
op_result.type().dyn_cast<paddle::dialect::DenseTensorType>().dtype());

if (dims.size() == 1 && dims[0] == 0) {
std::shared_ptr<phi::Allocation> allocation_ptr = nullptr;
dense_tensor = std::make_shared<phi::DenseTensor>(
allocation_ptr, phi::DenseTensorMeta(dtype, ddims));
} else {
// TODO(dev): we need enhance check for ddims.
dense_tensor = std::make_shared<phi::DenseTensor>(
std::make_shared<phi::Allocation>(),
phi::DenseTensorMeta(dtype, ddims));
}
tensor.set_impl(dense_tensor);
} else if (op_result.type().isa<paddle::dialect::SelectedRowsType>()) {
std::shared_ptr<phi::SelectedRows> selected_rows_tensor =
std::make_shared<phi::SelectedRows>();
tensor.set_impl(selected_rows_tensor);
}

if (!autograd_meta->GetMutableGradNode()) {
autograd_meta->SetGradNode(
std::make_shared<egr::GradNodeAccumulation>(autograd_meta));
}

return tensor;
}

PyObject* GetEmpytyTensorsWithOpResult(PyObject* self, PyObject* args) {
std::vector<paddle::Tensor> result;
std::unordered_map<pir::OpResult, paddle::Tensor> out_tensor_map;

auto op_result_list = PyTuple_GetItem(args, 0);

if (PyList_Check(op_result_list)) {
Py_ssize_t len = PyList_Size(op_result_list);
for (Py_ssize_t i = 0; i < len; i++) {
auto op_result =
PyObjectCast<pir::OpResult>(PyList_GetItem(op_result_list, i));
if (out_tensor_map.find(op_result) == out_tensor_map.end()) {
paddle::Tensor tensor = CreateTensorFromOpResult(op_result);
out_tensor_map[op_result] = tensor;
result.emplace_back(tensor);
} else {
result.emplace_back(out_tensor_map[op_result]);
}
}
} else if (PyTuple_Check(op_result_list)) {
Py_ssize_t len = PyTuple_Size(op_result_list);
for (Py_ssize_t i = 0; i < len; i++) {
auto op_result =
PyObjectCast<pir::OpResult>(PyTuple_GetItem(op_result_list, i));
if (out_tensor_map.find(op_result) == out_tensor_map.end()) {
paddle::Tensor tensor = CreateTensorFromOpResult(op_result);
out_tensor_map[op_result] = tensor;
result.emplace_back(tensor);
} else {
result.emplace_back(out_tensor_map[op_result]);
}
}
} else if (op_result_list != Py_None) {
PADDLE_THROW(platform::errors::InvalidArgument(
"Argument of GetTensorsWithOpResultInArgs must be list of OpResult, "
"but got "
"%s",
(reinterpret_cast<PyTypeObject*>(op_result_list->ob_type))->tp_name));
}

return ToPyObject(result);
}

paddle::experimental::Scalar CastNumpy2Scalar(PyObject* obj,
const std::string& op_type,
ssize_t arg_pos) {
Expand Down Expand Up @@ -2484,5 +2663,24 @@ void DistTensorConverter::operator()(paddle::optional<std::vector<Tensor>>* x) {
}
}

static PyMethodDef EagerUtilMethods[] = {
{"create_empty_tensors_with_var_descs",
(PyCFunction)(void (*)(void))GetEmpytyTensorsWithVarDesc,
METH_VARARGS,
"GetEmpytyTensorsWithVarDesc"},
{"create_empty_tensors_with_op_results",
(PyCFunction)(void (*)(void))GetEmpytyTensorsWithOpResult,
METH_VARARGS,
"GetEmpytyTensorsWithOpResult."},
{nullptr, nullptr, 0, nullptr}};

void BindEagerUtils(PyObject* module) {
if (PyModule_AddFunctions(module, EagerUtilMethods) < 0) {
PADDLE_THROW(platform::errors::Fatal(
"Init Paddle error in BindEagerUtils(PyModule_AddFunctions)."));
return;
}
}

} // namespace pybind
} // namespace paddle
18 changes: 18 additions & 0 deletions paddle/fluid/pybind/eager_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,18 @@ namespace pybind {

namespace py = ::pybind11;

template <typename T>
static T PyObjectCast(PyObject* obj) {
try {
return py::cast<T>(py::handle(obj));
} catch (py::cast_error&) {
PADDLE_THROW(platform::errors::InvalidArgument(
"Python object is not type of %s, the real type is %s",
typeid(T).name(),
obj->ob_type->tp_name));
}
}

int TensorDtype2NumpyDtype(phi::DataType dtype);

bool PyObject_CheckLongOrConvertToLong(PyObject** obj);
Expand Down Expand Up @@ -381,6 +393,10 @@ std::vector<paddle::Tensor> GetTensorListFromPyObject(PyObject* obj,
bool allow_none = false);
paddle::Tensor& UnSafeGetTensorFromPyObject(PyObject* obj);

PyObject* GetEmpytyTensorsWithVarDesc(PyObject* self, PyObject* args);

PyObject* GetEmpytyTensorsWithOpResult(PyObject* self, PyObject* args);

// end of Slice related methods

std::vector<paddle::framework::Scope*> GetScopePtrListFromArgs(
Expand Down Expand Up @@ -468,5 +484,7 @@ void ConvertAllInputsToDistTensor(const phi::distributed::ProcessMesh* mesh,
}

void ConvertToDistTensor(Tensor* x, const phi::distributed::ProcessMesh* mesh);
void BindEagerUtils(PyObject* module);

} // namespace pybind
} // namespace paddle
10 changes: 0 additions & 10 deletions paddle/fluid/pybind/imperative.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,6 @@ std::atomic<int> VarBaseUniqueNameID{0};

namespace py = ::pybind11;

template <typename T>
static T PyObjectCast(PyObject *obj) {
try {
return py::cast<T>(py::handle(obj));
} catch (py::cast_error &) {
PADDLE_THROW(platform::errors::InvalidArgument(
"Python object is not type of %s", typeid(T).name()));
}
}

class PyVariableWrapperHook : public imperative::VariableWrapperHook {
public:
explicit PyVariableWrapperHook(PyObject *func) : py_func_(func) {
Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/pybind/pir.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,16 @@
#pragma once

#include <pybind11/pybind11.h>
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/pir/core/op_result.h"

namespace paddle {
namespace pybind {
using pir::OpResult;
void BindPir(pybind11::module *m);
phi::DataType GetOpResultDtype(const OpResult &result);
const phi::DDim &GetOpResultDims(const OpResult &result);
bool GetOpResultBoolAttr(const OpResult &self, const std::string &attr_name);
} // namespace pybind
} // namespace paddle
13 changes: 1 addition & 12 deletions paddle/fluid/pybind/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -526,18 +526,6 @@ static PyObject *GetPythonAttribute(PyObject *obj, const char *attr_name) {
}
}

template <typename T>
static T PyObjectCast(PyObject *obj) {
try {
return py::cast<T>(py::handle(obj));
} catch (py::cast_error &) {
PADDLE_THROW(platform::errors::InvalidArgument(
"Python object is not type of %s, the real type is %s",
typeid(T).name(),
obj->ob_type->tp_name));
}
}

using PyNameVarBaseMap = std::unordered_map<std::string, py::handle>;

static std::vector<std::shared_ptr<imperative::VarBase>> GetVarBaseList(
Expand Down Expand Up @@ -814,6 +802,7 @@ PYBIND11_MODULE(libpaddle, m) {
BindJit(&m);
BindEvalFrame(&m);
BindCustomDevicePy(&m);
BindEagerUtils(m.ptr());

// Not used, just make sure cpu_info.cc is linked.
phi::backends::cpu::CpuTotalPhysicalMemory();
Expand Down
Loading

0 comments on commit 44721a7

Please sign in to comment.