From 70a4c7f71d11abb5aaf4aa5894ee5ee0659d11ae Mon Sep 17 00:00:00 2001 From: liuyuang Date: Thu, 20 Jan 2022 10:04:18 +0800 Subject: [PATCH 01/10] add dist model tensor wrapper --- .../dist_model_tensor_wrapper.cc | 42 ++++++++++++++ .../dist_model_tensor_wrapper.h | 55 +++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.cc create mode 100644 paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h diff --git a/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.cc b/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.cc new file mode 100644 index 0000000000000..9c7a3b7a0e989 --- /dev/null +++ b/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.cc @@ -0,0 +1,42 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace distributed { + +void DistModelDataBuf::Reset(void *data, size_t length) { + Free(); + memory_owned_ = false; + data_ = data; + length_ = length; +} + +void DistModelDataBuf::Free() { + if (memory_owned_ && data_) { + PADDLE_ENFORCE_GT(length_, 0UL, + platform::errors::PreconditionNotMet( + "Error occurred when deconstruct DistModelDataBuf: " + "it contains no data!")); + // NOTE: if own the memory, it must be char* type + delete[] static_cast(data_); + data_ = nullptr; + length_ = 0; + } +} + +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h b/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h new file mode 100644 index 0000000000000..8899c6885829c --- /dev/null +++ b/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h @@ -0,0 +1,55 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include "paddle/fluid/platform/macros.h" + +namespace paddle { +namespace distributed { + +enum DistModelDataType { FLOAT16, FLOAT32, INT64, INT32, INT8 }; + +class DistModelDataBuf { + public: + explicit DistModelDataBuf(size_t length) + : data_(new char[length]), length_(length), memory_owned_(true) {} + DistModelDataBuf(void* data, size_t length) + : data_(data), length_(length), memory_owned_(false) {} + void Reset(void* data, size_t length); + size_t length() const { return length_; } + void* data() const { return data_; } + ~DistModelDataBuf() { Free(); } + DistModelDataBuf() = default; + + private: + // TODO(fleet exe dev): support copy and assign later + DISABLE_COPY_AND_ASSIGN(DistModelDataBuf); + void Free(); + void* data_{nullptr}; + size_t length_{0}; + bool memory_owned_{false}; +}; + +struct DistModelTensor { + std::string name; + std::vector shape; + DistModelDataBuf data; + DistModelDataType type; + std::vector> lod; +}; + +} // namespace distributed +} // namespace paddle From 1f69ec442abf2b183a5f16edeb11ada8fbcecc5c Mon Sep 17 00:00:00 2001 From: liuyuang Date: Thu, 20 Jan 2022 10:52:38 +0800 Subject: [PATCH 02/10] add assgin and copy method --- .../dist_model_tensor_wrapper.cc | 62 ++++++++++++++++++- .../dist_model_tensor_wrapper.h | 8 ++- paddle/fluid/pybind/bind_fleet_executor.cc | 13 ++++ 3 files changed, 79 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.cc b/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.cc index 9c7a3b7a0e989..b440d39c73a70 100644 --- a/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.cc +++ b/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.cc @@ -18,7 +18,7 @@ namespace paddle { namespace distributed { -void DistModelDataBuf::Reset(void *data, size_t length) { +void DistModelDataBuf::Reset(void* data, size_t length) { Free(); memory_owned_ = false; data_ = data; @@ -32,11 +32,69 @@ void DistModelDataBuf::Free() { "Error occurred when deconstruct DistModelDataBuf: " "it contains no data!")); // NOTE: if own the memory, it must be char* type - delete[] static_cast(data_); + delete[] static_cast(data_); data_ = nullptr; length_ = 0; } } +void DistModelDataBuf::Resize(size_t length) { + if (length_ >= length) { + return; + } + if (memory_owned_) { + Free(); + data_ = new char[length]; + length_ = length; + memory_owned_ = true; + } else { + PADDLE_THROW(platform::errors::PreconditionNotMet( + "The memory is allocated externally, can not Resized")); + } +} + +DistModelDataBuf& DistModelDataBuf::operator=(const DistModelDataBuf& other) { + if (!other.memory_owned_) { + data_ = other.data_; + length_ = other.length_; + memory_owned_ = other.memory_owned_; + } else { + Resize(other.length_); + if (other.length() && other.data()) { + std::memcpy(data_, other.data(), other.length()); + } else if (other.length()) { + PADDLE_THROW(platform::errors::InvalidArgument( + "Invalid argument, null pointer data with length %u is passed", + other.length())); + } + length_ = other.length_; + memory_owned_ = true; + } + return *this; +} + +DistModelDataBuf& DistModelDataBuf::operator=(DistModelDataBuf&& other) { + data_ = other.data_; + memory_owned_ = other.memory_owned_; + length_ = other.length_; + other.data_ = nullptr; + other.length_ = 0; + other.memory_owned_ = false; + return *this; +} + +DistModelDataBuf::DistModelDataBuf(DistModelDataBuf&& other) + : data_(other.data_), + length_(other.length_), + memory_owned_(other.memory_owned_) { + other.memory_owned_ = false; + other.data_ = nullptr; + other.length_ = 0; +} + +DistModelDataBuf::DistModelDataBuf(const DistModelDataBuf& other) { + *this = other; +} + } // namespace distributed } // namespace paddle diff --git a/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h b/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h index 8899c6885829c..a06677d581ddf 100644 --- a/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h +++ b/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h @@ -34,9 +34,13 @@ class DistModelDataBuf { ~DistModelDataBuf() { Free(); } DistModelDataBuf() = default; + DistModelDataBuf& operator=(const DistModelDataBuf& other); + DistModelDataBuf& operator=(DistModelDataBuf&& other); + DistModelDataBuf(DistModelDataBuf&& other); + DistModelDataBuf(const DistModelDataBuf& other); + private: - // TODO(fleet exe dev): support copy and assign later - DISABLE_COPY_AND_ASSIGN(DistModelDataBuf); + void Resize(size_t length); void Free(); void* data_{nullptr}; size_t length_{0}; diff --git a/paddle/fluid/pybind/bind_fleet_executor.cc b/paddle/fluid/pybind/bind_fleet_executor.cc index 08f8aec52883f..3d3b83aa30d10 100644 --- a/paddle/fluid/pybind/bind_fleet_executor.cc +++ b/paddle/fluid/pybind/bind_fleet_executor.cc @@ -14,7 +14,10 @@ #include "paddle/fluid/pybind/bind_fleet_executor.h" #include +#include +#include #include "paddle/fluid/distributed/fleet_executor/dist_model.h" +#include "paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h" #include "paddle/fluid/distributed/fleet_executor/fleet_executor.h" #include "paddle/fluid/distributed/fleet_executor/task_node.h" #include "paddle/fluid/framework/operator.h" @@ -31,6 +34,8 @@ using paddle::distributed::FleetExecutor; using paddle::distributed::TaskNode; using paddle::distributed::DistModelConfig; using paddle::distributed::DistModel; +using paddle::distributed::DistModelDataBuf; +using paddle::distributed::DistModelTensor; using paddle::framework::OpDesc; using paddle::framework::ProgramDesc; @@ -78,6 +83,14 @@ void BindFleetExecutor(py::module* m) { .def(py::init()) .def("init", &DistModel::Init) .def("run", &DistModel::Run, py::call_guard()); + + py::class_(*m, "DistModelDataBuf") + .def(py::init()) + .def(py::init([](std::vector& data) { + auto buf = DistModelDataBuf(data.size() * sizeof(float)); + std::memcpy(buf.data(), static_cast(data.data()), buf.length()); + return buf; + })) } } // namespace pybind } // namespace paddle From a3889204f75a50006f58991847a2a86fa80d8bb6 Mon Sep 17 00:00:00 2001 From: liuyuang Date: Thu, 20 Jan 2022 11:55:56 +0800 Subject: [PATCH 03/10] add pybind --- .../dist_model_tensor_wrapper.h | 22 +++- paddle/fluid/pybind/bind_fleet_executor.cc | 106 ++++++++++++++++++ 2 files changed, 126 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h b/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h index a06677d581ddf..4a04633388af2 100644 --- a/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h +++ b/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h @@ -22,6 +22,24 @@ namespace distributed { enum DistModelDataType { FLOAT16, FLOAT32, INT64, INT32, INT8 }; +template +constexpr DistModelDataType DistModelGetDtype(); + +template <> +constexpr DistModelDataType DistModelGetDtype() { + return DistModelDataType::INT32; +} + +template <> +constexpr DistModelDataType DistModelGetDtype() { + return DistModelDataType::INT64; +} + +template <> +constexpr DistModelDataType DistModelGetDtype() { + return DistModelDataType::FLOAT32; +} + class DistModelDataBuf { public: explicit DistModelDataBuf(size_t length) @@ -33,6 +51,7 @@ class DistModelDataBuf { void* data() const { return data_; } ~DistModelDataBuf() { Free(); } DistModelDataBuf() = default; + void Resize(size_t length); DistModelDataBuf& operator=(const DistModelDataBuf& other); DistModelDataBuf& operator=(DistModelDataBuf&& other); @@ -40,7 +59,6 @@ class DistModelDataBuf { DistModelDataBuf(const DistModelDataBuf& other); private: - void Resize(size_t length); void Free(); void* data_{nullptr}; size_t length_{0}; @@ -51,7 +69,7 @@ struct DistModelTensor { std::string name; std::vector shape; DistModelDataBuf data; - DistModelDataType type; + DistModelDataType dtype; std::vector> lod; }; diff --git a/paddle/fluid/pybind/bind_fleet_executor.cc b/paddle/fluid/pybind/bind_fleet_executor.cc index 3d3b83aa30d10..efc5137b67637 100644 --- a/paddle/fluid/pybind/bind_fleet_executor.cc +++ b/paddle/fluid/pybind/bind_fleet_executor.cc @@ -36,9 +36,56 @@ using paddle::distributed::DistModelConfig; using paddle::distributed::DistModel; using paddle::distributed::DistModelDataBuf; using paddle::distributed::DistModelTensor; +using paddle::distributed::DistModelDataType; using paddle::framework::OpDesc; using paddle::framework::ProgramDesc; +template +DistModelDataBuf DistModelDataBufCreate( + py::array_t data) { + // accept numpy array directly + DistModelDataBuf buf(data.sizet() * sizeof(T)); + std::copy_n(static_cast(data.data()), data.size(), + static_cast(buf.data())); + return buf; +} + +template +void DistModelDataBufReset( + DistModelDataBuf& buf, // NOLINT + py::array_t data) { // NOLINT + // reset the data with numpy array directly + buf.Resize(data.size() * sizeof(T)); + std::copy_n(static_cast(data.data()), data.sizer(), + static_cast(buf.data())); +} + +template +DistModelTensor DistModelTensorCreate( + py::array_t data, + const std::string name, const std::vector>& lod, + bool copy) { + DistModelTensor tensor; + + if (copy) { + DistModelDataBufBuf buf(data.size() * sizeof(T)); + std::copy_n(static_cast(data.data()), data.size(), + static_cast(buf.data())); + tensor.data = data; + } else { + tensor.data = + DistModelDataBuf(data.mutable_data(), data.size() * sizeof(T)); + } + + tensor.dtype = paddle::distributed::DistModelDataType(); + tensor.name = name; + tesnor.lod = lod; + tensor.shape.resize(data.ndim()); + std::copy_n(data.shape(), data.ndim(), tensor.shape.begin()); + + return tensor; +} + void BindFleetExecutor(py::module* m) { py::class_(*m, "FleetExecutor") .def(py::init()) @@ -91,6 +138,65 @@ void BindFleetExecutor(py::module* m) { std::memcpy(buf.data(), static_cast(data.data()), buf.length()); return buf; })) + .def(py::init(&DistModelDataBufCreate)) + .def(py::init(&DistModelDataBufCreate)) + .def(py::init(&DistModelDataBufCreate)) + .def("reset", + [](DistModelDataBuf& self, std::vector& data) { + self.Resize(data.size() * sizeof(float)); + std::memcpy(self.data(), data.data(), self.length()); + }) + .def("reset", &DistModelDataBufReset) + .def("reset", &DistModelDataBufReset) + .def("reset", &DistModelDataBufReset) + .def("length", &DistModelDataBuf::length) + .def("tolist", + [](DistModelDataBuf& self, const std::string& dtype) -> py::list { + py::list l; + if (dtype == "int32") { + auto* data = static_cast(self.data()); + auto size = self.length() / sizeof(int32_t); + l = py::cast(std::vector(data, data + size)); + } else if (dtype == "int64") { + auto* data = static_cast(self.data()); + auto size = self.length() / sizeof(int64_t); + l = py::cast(std::vector(data, data + size)); + } else if (dtype == "float32") { + auto* data = static_cast(self.data()); + auto size = self.length() / sizeof(float); + l = py::cast(std::vector(data, data + size)); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported data type. Now only supports INT32, INT64 and " + "FLOAT32.")); + } + return l; + }); + + py::class_(*m, "DistModelTensor") + .def(py::init<>()) + .def(py::init(&DistModelTensorCreate), py::arg("data"), + py::arg("name") = "", + py::arg("lod") = std::vector>(), + py::arg("copy") = true) + .def(py::init(&DistModelTensorCreate), py::arg("data"), + py::arg("name") = "", + py::arg("lod") = std::vector>(), + py::arg("copy") = true) + .def(py::init(&DistModelTensorCreate), py::arg("data"), + py::arg("name") = "", + py::arg("lod") = std::vector>(), + py::arg("copy") = true) + .def_readwrite("name", &DistModelTensor::name) + .def_readwrite("shape", &DistModelTensor::shape) + .def_readwrite("data", &DistModelTensor::data) + .def_readwrite("dtype", &DistModelTensor::dtype) + .def_readwrite("lod", &DistModelTensor::lod); + + py::enum_(*m, "DistModelDataType") + .value("FLOAT32", DistModelDataType::FLOAT32) + .value("INT64", DistModelDataType::INT64) + .value("INT32", DistModelDataType::INT32); } } // namespace pybind } // namespace paddle From 519ceeaee78ecfa7d632555ef70aeff7470a4c73 Mon Sep 17 00:00:00 2001 From: liuyuang Date: Thu, 20 Jan 2022 12:05:40 +0800 Subject: [PATCH 04/10] add ut for tensor --- .../tests/unittests/test_dist_model_tensor.py | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/test_dist_model_tensor.py diff --git a/python/paddle/fluid/tests/unittests/test_dist_model_tensor.py b/python/paddle/fluid/tests/unittests/test_dist_model_tensor.py new file mode 100644 index 0000000000000..eb2de783605a2 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_model_tensor.py @@ -0,0 +1,58 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle +paddle.enable_static() +import numpy as np +from paddle.fluid.core import DistModelTensor +from paddle.fluid.core import DistModelDataType + + +class TestInferenceApi(unittest.TestCase): + def test_inference_api(self): + tensor32 = np.random.randint(10, 20, size=[20, 2]).astype('int32') + dist_tensor32 = DistModelTensor(tensor32, '32_tensor') + dtype32 = dist_tensor32.dtype + self.assertEqual(dtype32, DistModelDataType.INT32) + self.assertEqual( + dist_tensor32.data.tolist('int32'), tensor32.ravel().tolist()) + self.assertEqual(dist_tensor32.data.length(), 40) + self.assertEqual(dist_tensor32.name, '32_tensor') + dist_tensor32.data.reset(tensor32) + + tensor64 = np.random.randint(10, 20, size=[20, 2]).astype('int64') + dist_tensor64 = DistModelTensor(tensor64, '64_tensor') + dtype64 = dist_tensor64.dtype + self.assertEqual(dtype64, DistModelDataType.INT64) + self.assertEqual( + dist_tensor64.data.tolist('int64'), tensor64.ravel().tolist()) + self.assertEqual(dist_tensor64.data.length(), 40) + self.assertEqual(dist_tensor64.name, '64_tensor') + dist_tensor64.data.reset(tensor64) + + tensor_float = np.random.randn(20, 2).astype('float32') + dist_tensor_float = DistModelTensor(tensor_float, 'float_tensor') + dtype_float = dist_tensor_float.dtype + self.assertEqual(dtype_float, DistModelDataType.FLOAT32) + self.assertEqual( + dist_tensor_float.data.tolist('float32'), + tensor_float.ravel().tolist()) + self.assertEqual(dist_tensor_float.data.length(), 40) + self.assertEqual(dist_tensor_float.name, 'float_tensor') + dist_tensor_float.data.reset(tensor_float) + + +if __name__ == '__main__': + unittest.main() From e5e2c9111c02978fe5663044f17b1fa8a25af6ad Mon Sep 17 00:00:00 2001 From: liuyuang Date: Thu, 20 Jan 2022 14:44:25 +0800 Subject: [PATCH 05/10] bug fix and add tondarray method --- paddle/fluid/pybind/bind_fleet_executor.cc | 46 ++++++++++++++++--- .../tests/unittests/test_dist_model_tensor.py | 6 +++ 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/pybind/bind_fleet_executor.cc b/paddle/fluid/pybind/bind_fleet_executor.cc index efc5137b67637..e1f8ec4bd7cb6 100644 --- a/paddle/fluid/pybind/bind_fleet_executor.cc +++ b/paddle/fluid/pybind/bind_fleet_executor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/pybind/bind_fleet_executor.h" +#include #include #include #include @@ -44,7 +45,7 @@ template DistModelDataBuf DistModelDataBufCreate( py::array_t data) { // accept numpy array directly - DistModelDataBuf buf(data.sizet() * sizeof(T)); + DistModelDataBuf buf(data.size() * sizeof(T)); std::copy_n(static_cast(data.data()), data.size(), static_cast(buf.data())); return buf; @@ -56,19 +57,19 @@ void DistModelDataBufReset( py::array_t data) { // NOLINT // reset the data with numpy array directly buf.Resize(data.size() * sizeof(T)); - std::copy_n(static_cast(data.data()), data.sizer(), + std::copy_n(static_cast(data.data()), data.size(), static_cast(buf.data())); } template DistModelTensor DistModelTensorCreate( py::array_t data, - const std::string name, const std::vector>& lod, + const std::string name, const std::vector>& lod, bool copy) { DistModelTensor tensor; if (copy) { - DistModelDataBufBuf buf(data.size() * sizeof(T)); + DistModelDataBuf buf(data.size() * sizeof(T)); std::copy_n(static_cast(data.data()), data.size(), static_cast(buf.data())); tensor.data = data; @@ -77,7 +78,7 @@ DistModelTensor DistModelTensorCreate( DistModelDataBuf(data.mutable_data(), data.size() * sizeof(T)); } - tensor.dtype = paddle::distributed::DistModelDataType(); + tensor.dtype = paddle::distributed::DistModelGetDtype(); tensor.name = name; tesnor.lod = lod; tensor.shape.resize(data.ndim()); @@ -86,6 +87,38 @@ DistModelTensor DistModelTensorCreate( return tensor; } +py::dtype DistModelTensorGetData(DistModelDataType dtype) { + py::dtype dt; + switch (dtype) { + case DistModelDataType::INT32: + dt = py::dtype::of(); + break; + case DistModelDataType::INT64: + dt = py::dtype::of(); + break; + case DistModelDataType::FLOAT32: + dt = py::dtype::of(); + break; + case DistModelDataType::INT8: + dt = py::dtype::of(); + break; + case DistModelDataType::FLOAT16: + dt = py::dtype::of(); + break; + default: + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported data type. Now only supports INT32, INT64, INT8, " + "FLOAT16 and FLOAT32.")); + } + + return dt; +} + +py::array DistModelTensorGetData(DistModelTensor& tensor) { // NOLINT + py::dtype dt = DistModelTypeToNumpyDType(tensor.dtype); + return py::array(std::move(dt), {tensor.shape}, tensor.data.data()); +} + void BindFleetExecutor(py::module* m) { py::class_(*m, "FleetExecutor") .def(py::init()) @@ -196,7 +229,8 @@ void BindFleetExecutor(py::module* m) { py::enum_(*m, "DistModelDataType") .value("FLOAT32", DistModelDataType::FLOAT32) .value("INT64", DistModelDataType::INT64) - .value("INT32", DistModelDataType::INT32); + .value("INT32", DistModelDataType::INT32) + .def("as_ndarray", &DistModelTensorGetData); } } // namespace pybind } // namespace paddle diff --git a/python/paddle/fluid/tests/unittests/test_dist_model_tensor.py b/python/paddle/fluid/tests/unittests/test_dist_model_tensor.py index eb2de783605a2..466639fa3cc21 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_model_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_dist_model_tensor.py @@ -31,6 +31,8 @@ def test_inference_api(self): self.assertEqual(dist_tensor32.data.length(), 40) self.assertEqual(dist_tensor32.name, '32_tensor') dist_tensor32.data.reset(tensor32) + self.assertEqual(dist_tensor32.as_ndarray().ravel().tolist(), + tensor32.ravel().tolist()) tensor64 = np.random.randint(10, 20, size=[20, 2]).astype('int64') dist_tensor64 = DistModelTensor(tensor64, '64_tensor') @@ -41,6 +43,8 @@ def test_inference_api(self): self.assertEqual(dist_tensor64.data.length(), 40) self.assertEqual(dist_tensor64.name, '64_tensor') dist_tensor64.data.reset(tensor64) + self.assertEqual(dist_tensor64.as_ndarray().ravel().tolist(), + tensor32.ravel().tolist()) tensor_float = np.random.randn(20, 2).astype('float32') dist_tensor_float = DistModelTensor(tensor_float, 'float_tensor') @@ -52,6 +56,8 @@ def test_inference_api(self): self.assertEqual(dist_tensor_float.data.length(), 40) self.assertEqual(dist_tensor_float.name, 'float_tensor') dist_tensor_float.data.reset(tensor_float) + self.assertEqual(dist_tensor_float.as_ndarray().ravel().tolist(), + tensor32.ravel().tolist()) if __name__ == '__main__': From 473ca0642ef5a0e03c6ce6f02305da666b6079d2 Mon Sep 17 00:00:00 2001 From: liuyuang Date: Thu, 20 Jan 2022 14:53:22 +0800 Subject: [PATCH 06/10] bug fix --- paddle/fluid/pybind/bind_fleet_executor.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/pybind/bind_fleet_executor.cc b/paddle/fluid/pybind/bind_fleet_executor.cc index e1f8ec4bd7cb6..5bb29fa8a5cb1 100644 --- a/paddle/fluid/pybind/bind_fleet_executor.cc +++ b/paddle/fluid/pybind/bind_fleet_executor.cc @@ -72,7 +72,7 @@ DistModelTensor DistModelTensorCreate( DistModelDataBuf buf(data.size() * sizeof(T)); std::copy_n(static_cast(data.data()), data.size(), static_cast(buf.data())); - tensor.data = data; + tensor.data = std::move(buf); } else { tensor.data = DistModelDataBuf(data.mutable_data(), data.size() * sizeof(T)); @@ -80,14 +80,14 @@ DistModelTensor DistModelTensorCreate( tensor.dtype = paddle::distributed::DistModelGetDtype(); tensor.name = name; - tesnor.lod = lod; + tensor.lod = lod; tensor.shape.resize(data.ndim()); std::copy_n(data.shape(), data.ndim(), tensor.shape.begin()); return tensor; } -py::dtype DistModelTensorGetData(DistModelDataType dtype) { +py::dtype DistModelTypeToNumpyDType(DistModelDataType dtype) { py::dtype dt; switch (dtype) { case DistModelDataType::INT32: From 1f722289bab738effc22a16b5943b54c5bcee67d Mon Sep 17 00:00:00 2001 From: liuyuang Date: Thu, 20 Jan 2022 14:56:47 +0800 Subject: [PATCH 07/10] update dist model --- paddle/fluid/distributed/fleet_executor/dist_model.cc | 4 ++-- paddle/fluid/distributed/fleet_executor/dist_model.h | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/distributed/fleet_executor/dist_model.cc b/paddle/fluid/distributed/fleet_executor/dist_model.cc index 310c809de7172..6454a34950513 100644 --- a/paddle/fluid/distributed/fleet_executor/dist_model.cc +++ b/paddle/fluid/distributed/fleet_executor/dist_model.cc @@ -355,8 +355,8 @@ bool DistModel::PrepareFeedAndFetch() { return true; } -void DistModel::Run(const std::vector &input_data, - std::vector *output_data) { +void DistModel::Run(const std::vector &input_data, + std::vector *output_data) { /* TODO(fleet exe dev): implement this funct */ } diff --git a/paddle/fluid/distributed/fleet_executor/dist_model.h b/paddle/fluid/distributed/fleet_executor/dist_model.h index d6dc554f158d3..96e9c018074b5 100644 --- a/paddle/fluid/distributed/fleet_executor/dist_model.h +++ b/paddle/fluid/distributed/fleet_executor/dist_model.h @@ -17,6 +17,7 @@ #include #include +#include "paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h" #include "paddle/fluid/distributed/fleet_executor/fleet_executor_desc.pb.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/macros.h" @@ -56,8 +57,8 @@ class DistModel { public: explicit DistModel(const DistModelConfig& config) : config_(config) {} bool Init(); - void Run(const std::vector& input_data, - std::vector* output_data); + void Run(const std::vector& input_data, + std::vector* output_data); ~DistModel() = default; private: From 843615a3b43dc362f6e2d2aabf6f88ae77b580fc Mon Sep 17 00:00:00 2001 From: liuyuang Date: Thu, 20 Jan 2022 15:04:03 +0800 Subject: [PATCH 08/10] add cmake dependency --- paddle/fluid/distributed/fleet_executor/CMakeLists.txt | 4 ++-- .../paddle/fluid/tests/unittests/test_dist_model_tensor.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/distributed/fleet_executor/CMakeLists.txt b/paddle/fluid/distributed/fleet_executor/CMakeLists.txt index 1e31187367bd3..3e734b1b9ed24 100644 --- a/paddle/fluid/distributed/fleet_executor/CMakeLists.txt +++ b/paddle/fluid/distributed/fleet_executor/CMakeLists.txt @@ -12,8 +12,8 @@ endif() cc_library(task_loop_thread_pool SRCS task_loop_thread_pool.cc task_loop_thread.cc task_loop.cc DEPS enforce glog) -cc_library(fleet_executor SRCS fleet_executor.cc carrier.cc task_node.cc runtime_graph.cc dist_model.cc - interceptor.cc compute_interceptor.cc amplifier_interceptor.cc message_service.cc message_bus.cc +cc_library(fleet_executor SRCS fleet_executor.cc carrier.cc task_node.cc runtime_graph.cc dist_model.cc interceptor.cc + compute_interceptor.cc amplifier_interceptor.cc message_service.cc message_bus.cc dist_model_tensor_wrapper.cc DEPS proto_desc fleet_executor_desc_proto interceptor_message_proto task_loop_thread_pool collective_helper op_registry executor_gc_helper gflags glog ${BRPC_DEPS}) diff --git a/python/paddle/fluid/tests/unittests/test_dist_model_tensor.py b/python/paddle/fluid/tests/unittests/test_dist_model_tensor.py index 466639fa3cc21..851e940552376 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_model_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_dist_model_tensor.py @@ -14,14 +14,15 @@ import unittest import paddle -paddle.enable_static() import numpy as np from paddle.fluid.core import DistModelTensor from paddle.fluid.core import DistModelDataType +paddle.enable_static() + -class TestInferenceApi(unittest.TestCase): - def test_inference_api(self): +class TestDistModelTensor(unittest.TestCase): + def test_dist_model_tensor(self): tensor32 = np.random.randint(10, 20, size=[20, 2]).astype('int32') dist_tensor32 = DistModelTensor(tensor32, '32_tensor') dtype32 = dist_tensor32.dtype From a51274c18cb2557c54d2dd94980c2ecd3598e111 Mon Sep 17 00:00:00 2001 From: liuyuang Date: Thu, 20 Jan 2022 15:19:23 +0800 Subject: [PATCH 09/10] bug fix --- paddle/fluid/pybind/bind_fleet_executor.cc | 6 +-- .../tests/unittests/test_dist_model_tensor.py | 38 +++++++++---------- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/paddle/fluid/pybind/bind_fleet_executor.cc b/paddle/fluid/pybind/bind_fleet_executor.cc index 5bb29fa8a5cb1..450939dd0ff8b 100644 --- a/paddle/fluid/pybind/bind_fleet_executor.cc +++ b/paddle/fluid/pybind/bind_fleet_executor.cc @@ -224,13 +224,13 @@ void BindFleetExecutor(py::module* m) { .def_readwrite("shape", &DistModelTensor::shape) .def_readwrite("data", &DistModelTensor::data) .def_readwrite("dtype", &DistModelTensor::dtype) - .def_readwrite("lod", &DistModelTensor::lod); + .def_readwrite("lod", &DistModelTensor::lod) + .def("as_ndarray", &DistModelTensorGetData); py::enum_(*m, "DistModelDataType") .value("FLOAT32", DistModelDataType::FLOAT32) .value("INT64", DistModelDataType::INT64) - .value("INT32", DistModelDataType::INT32) - .def("as_ndarray", &DistModelTensorGetData); + .value("INT32", DistModelDataType::INT32); } } // namespace pybind } // namespace paddle diff --git a/python/paddle/fluid/tests/unittests/test_dist_model_tensor.py b/python/paddle/fluid/tests/unittests/test_dist_model_tensor.py index 851e940552376..da25550c4f47e 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_model_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_dist_model_tensor.py @@ -23,42 +23,40 @@ class TestDistModelTensor(unittest.TestCase): def test_dist_model_tensor(self): - tensor32 = np.random.randint(10, 20, size=[20, 2]).astype('int32') - dist_tensor32 = DistModelTensor(tensor32, '32_tensor') - dtype32 = dist_tensor32.dtype - self.assertEqual(dtype32, DistModelDataType.INT32) + tensor_32 = np.random.randint(10, 20, size=[20, 2]).astype('int32') + dist_tensor32 = DistModelTensor(tensor_32, '32_tensor') + self.assertEqual(dist_tensor32.dtype, DistModelDataType.INT32) self.assertEqual( - dist_tensor32.data.tolist('int32'), tensor32.ravel().tolist()) - self.assertEqual(dist_tensor32.data.length(), 40) + dist_tensor32.data.tolist('int32'), tensor_32.ravel().tolist()) + # the length is how many byte the data contains + self.assertEqual(dist_tensor32.data.length(), 40 * 4) self.assertEqual(dist_tensor32.name, '32_tensor') - dist_tensor32.data.reset(tensor32) + dist_tensor32.data.reset(tensor_32) self.assertEqual(dist_tensor32.as_ndarray().ravel().tolist(), - tensor32.ravel().tolist()) + tensor_32.ravel().tolist()) - tensor64 = np.random.randint(10, 20, size=[20, 2]).astype('int64') - dist_tensor64 = DistModelTensor(tensor64, '64_tensor') - dtype64 = dist_tensor64.dtype - self.assertEqual(dtype64, DistModelDataType.INT64) + tensor_64 = np.random.randint(10, 20, size=[20, 2]).astype('int64') + dist_tensor64 = DistModelTensor(tensor_64, '64_tensor') + self.assertEqual(dist_tensor64.dtype, DistModelDataType.INT64) self.assertEqual( - dist_tensor64.data.tolist('int64'), tensor64.ravel().tolist()) - self.assertEqual(dist_tensor64.data.length(), 40) + dist_tensor64.data.tolist('int64'), tensor_64.ravel().tolist()) + self.assertEqual(dist_tensor64.data.length(), 40 * 8) self.assertEqual(dist_tensor64.name, '64_tensor') - dist_tensor64.data.reset(tensor64) + dist_tensor64.data.reset(tensor_64) self.assertEqual(dist_tensor64.as_ndarray().ravel().tolist(), - tensor32.ravel().tolist()) + tensor_64.ravel().tolist()) tensor_float = np.random.randn(20, 2).astype('float32') dist_tensor_float = DistModelTensor(tensor_float, 'float_tensor') - dtype_float = dist_tensor_float.dtype - self.assertEqual(dtype_float, DistModelDataType.FLOAT32) + self.assertEqual(dist_tensor_float.dtype, DistModelDataType.FLOAT32) self.assertEqual( dist_tensor_float.data.tolist('float32'), tensor_float.ravel().tolist()) - self.assertEqual(dist_tensor_float.data.length(), 40) + self.assertEqual(dist_tensor_float.data.length(), 40 * 4) self.assertEqual(dist_tensor_float.name, 'float_tensor') dist_tensor_float.data.reset(tensor_float) self.assertEqual(dist_tensor_float.as_ndarray().ravel().tolist(), - tensor32.ravel().tolist()) + tensor_float.ravel().tolist()) if __name__ == '__main__': From c42fe4f4292be9a5884a37a48f5c49d65686360e Mon Sep 17 00:00:00 2001 From: liuyuang Date: Thu, 20 Jan 2022 16:15:30 +0800 Subject: [PATCH 10/10] remove the ut from win --- python/paddle/fluid/tests/unittests/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 915af18a5702d..5c57d1a21bce6 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -152,6 +152,7 @@ if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32) LIST(REMOVE_ITEM TEST_OPS test_fleet_executor_origin_scheduler) LIST(REMOVE_ITEM TEST_OPS test_auto_parallel_mapper) LIST(REMOVE_ITEM TEST_OPS test_fleet_executor_task_node) + LIST(REMOVE_ITEM TEST_OPS test_dist_model_tensor) endif() # Temporally disable test_deprecated_decorator