Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Doc] remove execution_strategy doc #53668

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 6 additions & 103 deletions paddle/fluid/pybind/parallel_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -188,39 +188,7 @@ using namespace paddle::framework; // NOLINT
void BindParallelExecutor(pybind11::module &m) { // NOLINT
// -- python binds for parallel executor.
py::class_<ParallelExecutor> pe(m, "ParallelExecutor");
py::class_<ExecutionStrategy> exec_strategy(pe, "ExecutionStrategy", R"DOC(
ExecutionStrategy allows the user to more preciously control how to run
the program in ParallelExecutor by setting the property.

Returns:
ExecutionStrategy: An ExecutionStrategy object.

Examples:
.. code-block:: python

import paddle
import paddle.static as static
import paddle.nn.functional as F

paddle.enable_static()

x = static.data(name='x', shape=[None, 13], dtype='float32')
y = static.data(name='y', shape=[None, 1], dtype='float32')
y_predict = static.nn.fc(input=x, size=1, act=None)

cost = F.square_error_cost(input=y_predict, label=y)
avg_loss = paddle.mean(cost)

sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.001)
sgd_optimizer.minimize(avg_loss)

exec_strategy = static.ExecutionStrategy()
exec_strategy.num_threads = 4

train_exe = static.ParallelExecutor(use_cuda=False,
loss_name=avg_loss.name,
exec_strategy=exec_strategy)
)DOC");
py::class_<ExecutionStrategy> exec_strategy(pe, "ExecutionStrategy");

py::enum_<paddle::platform::DeviceType>(m, "DeviceType", py::arithmetic())
.value("CPU", paddle::platform::DeviceType::CPU)
Expand All @@ -233,29 +201,7 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT
[](const ExecutionStrategy &self) { return self.num_threads_; },
[](ExecutionStrategy &self, size_t num_threads) {
self.num_threads_ = num_threads;
},
R"DOC(
The type is INT, num_threads represents the size of thread pool that
used to run the operators of the current program in ParallelExecutor.
If :math:`num\_threads=1`, all the operators will execute one by one,
but the order maybe difference between iterations.
If it is not set, it will be set in ParallelExecutor according to the
device type and device count, for GPU, :math:`num\_threads=device\_count*4`, for CPU,
:math:`num\_threads=CPU\_NUM*4`, the explanation of:math:`CPU\_NUM` is in ParallelExecutor.
if it is not set, ParallelExecutor will get the cpu count by calling
`multiprocessing.cpu_count()`. Default 0.

Examples:
.. code-block:: python

import paddle
import paddle.static as static

paddle.enable_static()

exec_strategy = static.ExecutionStrategy()
exec_strategy.num_threads = 4
)DOC")
})
.def_property(
"_use_device",
[](const ExecutionStrategy &self) { return self.use_device_; },
Expand All @@ -268,72 +214,29 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT
[](const ExecutionStrategy &self) { return self.allow_op_delay_; },
[](ExecutionStrategy &self, bool allow_op_delay) {
self.allow_op_delay_ = allow_op_delay;
},
R"DOC(The type is BOOL, allow_op_delay represents whether to delay the
communication operators to run, it may make the execution faster.
Note that this option is invalid now, and it will be removed in
next version. Default False.)DOC")
})
.def_property(
"num_iteration_per_drop_scope",
[](const ExecutionStrategy &self) {
return self.num_iteration_per_drop_scope_;
},
[](ExecutionStrategy &self, size_t num_iteration_per_drop_scope) {
self.num_iteration_per_drop_scope_ = num_iteration_per_drop_scope;
},
R"DOC(The type is INT, num_iteration_per_drop_scope indicates how
many iterations to clean up the temp variables which
is generated during execution. It may make the execution faster,
because the temp variable's shape maybe the same between two iterations.
Default 100.

.. note::
1. If you fetch data when calling the 'run', the ParallelExecutor
will clean up the temp variables at the end of the current iteration.
2. In some NLP model, it may cause the GPU memory is insufficient,
in this case, you should reduce `num_iteration_per_drop_scope`.

Examples:
.. code-block:: python

import paddle
import paddle.static as static

paddle.enable_static()

exec_strategy = static.ExecutionStrategy()
exec_strategy.num_iteration_per_drop_scope = 10
)DOC")
})
.def_property(
"num_iteration_per_run",
[](const ExecutionStrategy &self) {
return self.num_iteration_per_run_;
},
[](ExecutionStrategy &self, size_t num_iteration_per_run) {
self.num_iteration_per_run_ = num_iteration_per_run;
},
R"DOC(This config that how many iteration the executor will run when
user call exe.run() in python。Default: 1.

Examples:
.. code-block:: python

import paddle
import paddle.static as static

paddle.enable_static()

exec_strategy = static.ExecutionStrategy()
exec_strategy.num_iteration_per_run = 10
)DOC")
})
.def_property(
"use_thread_barrier",
[](const ExecutionStrategy &self) { return self.thread_barrier_; },
[](ExecutionStrategy &self, bool use_thread_barrier) {
self.thread_barrier_ = use_thread_barrier;
},
R"DOC(This config that the this is distributed training with parameter server
)DOC")
})
.def_property(
"_dry_run",
[](const ExecutionStrategy &self) { return self.dry_run_; },
Expand Down