Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add profiler features #40357

Merged
merged 49 commits into from
Mar 23, 2022
Merged
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
64ba8d8
add event record for model profiling
rainyfly Mar 8, 2022
e5e7979
fix format
rainyfly Mar 8, 2022
1ad9fae
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
rainyfly Mar 8, 2022
3e8fe2c
fix format
rainyfly Mar 8, 2022
c5f2e6f
fix code example bug
rainyfly Mar 9, 2022
0edc2d9
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
rainyfly Mar 9, 2022
c9e5085
no
rainyfly Mar 9, 2022
10f9fb8
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
rainyfly Mar 9, 2022
048428a
add profiler statistic
rainyfly Mar 9, 2022
0d464b6
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
rainyfly Mar 9, 2022
c8037aa
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
rainyfly Mar 9, 2022
ba3d2dc
add profiler feature
rainyfly Mar 9, 2022
850949c
fix bug
rainyfly Mar 9, 2022
5e3d10a
fix bug
rainyfly Mar 9, 2022
610cf8b
fix bug
rainyfly Mar 9, 2022
dd0c1b1
fix bug
rainyfly Mar 9, 2022
145e3e0
required: gpu
rainyfly Mar 9, 2022
e3cca40
required: gpu
rainyfly Mar 9, 2022
b67db4b
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
rainyfly Mar 9, 2022
a0ff535
fix bug
rainyfly Mar 9, 2022
bcc0b6d
required: gpu
rainyfly Mar 9, 2022
358ad4f
fix ci bug
rainyfly Mar 10, 2022
18b44aa
fix ci error
rainyfly Mar 10, 2022
9c9718b
fix ci error
rainyfly Mar 10, 2022
3cd6277
Merge branch 'add_profiler_record_for_model' into add_profiler_features
rainyfly Mar 10, 2022
91bec51
upgrade document
rainyfly Mar 10, 2022
a67da4a
fix doc
rainyfly Mar 10, 2022
7922042
fix ci bug
rainyfly Mar 10, 2022
87b74d2
add doc and fix bug
rainyfly Mar 11, 2022
020eaf3
nothing
rainyfly Mar 11, 2022
e9760bb
fix bug
rainyfly Mar 11, 2022
470b4cf
fix format bug
rainyfly Mar 11, 2022
8a80c00
modify format
rainyfly Mar 14, 2022
5058279
add deprecated description for old profiler
rainyfly Mar 16, 2022
8b3692e
fix bug
rainyfly Mar 16, 2022
45bebd7
fix bug
rainyfly Mar 16, 2022
a7d8b03
fix
rainyfly Mar 16, 2022
fc3a199
add load_profiler_reuslt doc
rainyfly Mar 17, 2022
2acdb9d
add load_profiler_reuslt doc
rainyfly Mar 17, 2022
78c58ac
add load_profiler_reuslt doc
rainyfly Mar 17, 2022
73fad08
help fix old profiler sample code
rainyfly Mar 17, 2022
88b9479
add api doc
rainyfly Mar 21, 2022
b62a430
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
rainyfly Mar 21, 2022
037e014
fix format
rainyfly Mar 21, 2022
4e1910a
fix api doc
rainyfly Mar 21, 2022
90aaa68
fix api doc format
rainyfly Mar 21, 2022
7b7a9c0
fix api doc format
rainyfly Mar 21, 2022
c7b2f5c
fix api doc c format
rainyfly Mar 22, 2022
0a301f1
fix api doc format
rainyfly Mar 22, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions paddle/fluid/platform/profiler/cpu_utilization.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,19 @@ float CpuUtilization::GetCpuUtilization() {
float busy_time = (system_kernel_time_end - system_kernel_time_start) +
(system_user_time_end - system_user_time_start);
float idle_time = system_idle_time_end - system_idle_time_start;
cpu_utilization = busy_time / (busy_time + idle_time);

if (busy_time + idle_time != 0) {
cpu_utilization = busy_time / (busy_time + idle_time);
}
#elif defined(__linux__)
float busy_time = (system_tms_end_.tms_utime - system_tms_start_.tms_utime) +
(system_tms_end_.tms_stime - system_tms_start_.tms_stime) +
(nice_time_end_ - nice_time_start_) +
(irq_end_ - irq_start_) + (softirq_end_ - softirq_start_) +
(steal_end_ - steal_start_);
float idle_time = (idle_end_ - idle_start_) + (iowait_end_ - iowait_start_);
cpu_utilization = busy_time / (busy_time + idle_time);
if (busy_time + idle_time != 0) {
cpu_utilization = busy_time / (busy_time + idle_time);
}
#else
LOG(WARNING)
<< "Current System is not supported to get system cpu utilization"
Expand All @@ -148,13 +151,16 @@ float CpuUtilization::GetCpuCurProcessUtilization() {
uint64_t end = FileTimeToUint64(end_);
float busy_time = (process_kernel_time_end - process_kernel_time_start) +
(process_user_time_end - process_user_time_start);
cpu_process_utilization = busy_time / (end - start);
LOG(INFO) << "Process Utilization = " << cpu_process_utilization << std::endl;
if (end - start != 0) {
cpu_process_utilization = busy_time / (end - start);
}
#elif defined(__linux__)
float busy_time =
(process_tms_end_.tms_utime - process_tms_start_.tms_utime) +
(process_tms_end_.tms_stime - process_tms_start_.tms_stime);
cpu_process_utilization = busy_time / (end_ - start_);
if (end_ - start_ != 0) {
cpu_process_utilization = busy_time / (end_ - start_);
}
#else
LOG(WARNING)
<< "Current System is not supported to get process cpu utilization"
Expand Down
8 changes: 8 additions & 0 deletions paddle/fluid/platform/profiler/profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ std::unique_ptr<Profiler> Profiler::Create(const ProfilerOptions& options) {
return std::unique_ptr<Profiler>(new Profiler(options));
}

bool Profiler::IsCuptiSupported() {
bool supported = false;
#ifdef PADDLE_WITH_CUPTI
supported = true;
#endif
return supported;
}

Profiler::Profiler(const ProfilerOptions& options) {
options_ = options;
std::bitset<32> trace_switch(options_.trace_switch);
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/platform/profiler/profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ class Profiler {
public:
static std::unique_ptr<Profiler> Create(const ProfilerOptions& options);

static bool IsCuptiSupported();

void Prepare();

void Start();
Expand Down
1 change: 0 additions & 1 deletion paddle/fluid/platform/profiler/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ limitations under the License. */

#include "glog/logging.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/dynload/cupti.h"

namespace paddle {
namespace platform {
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/platform/profiler/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ limitations under the License. */

#include <ctime>
#include <string>
#include "paddle/fluid/platform/dynload/cupti.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/os_info.h"

Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/pybind/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2958,6 +2958,7 @@ All parameter, weight, gradient are variables in Paddle.
py::class_<paddle::platform::Profiler>(m, "_Profiler")
.def("create", &paddle::platform::Profiler::Create,
py::return_value_policy::take_ownership)
.def("is_cupti_supported", &paddle::platform::Profiler::IsCuptiSupported)
.def("prepare",
[](paddle::platform::Profiler *profiler) {
platform::EnableHostEventRecorder();
Expand Down
13 changes: 13 additions & 0 deletions python/paddle/fluid/dataloader/dataloader_iter.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import queue

import paddle
import paddle.profiler as profiler
from .. import core, layers
from ..framework import in_dygraph_mode, _in_eager_mode
from ..multiprocess_utils import _set_SIGCHLD_handler, MP_STATUS_CHECK_INTERVAL, CleanupFuncRegistrar
Expand Down Expand Up @@ -250,6 +251,10 @@ def _thread_loop(self, legacy_expected_place):
self._exit_thread_expectedly()

def __next__(self):
trace_event = profiler.RecordEvent(
name="_DataLoaderIterSingleProcess",
event_type=profiler.TracerEventType.Dataloader)
trace_event.begin()
try:
if in_dygraph_mode():
if _in_eager_mode():
Expand Down Expand Up @@ -283,6 +288,8 @@ def __next__(self):
self._reader.shutdown()
self._try_shutdown_all()
six.reraise(*sys.exc_info())
finally:
trace_event.end()

def _shutdown_thread(self):
if self._thread:
Expand Down Expand Up @@ -688,6 +695,10 @@ def _shutdown_on_exit(self):
self._try_shutdown_all(1)

def __next__(self):
trace_event = profiler.RecordEvent(
name="_DataLoaderIterMultiProcess",
event_type=profiler.TracerEventType.Dataloader)
trace_event.begin()
try:
# _batches_outstanding here record the total batch data number
# in 'from after _try_put_indices to beforeoutput data', this
Expand Down Expand Up @@ -736,6 +747,8 @@ def __next__(self):
self._reader.shutdown()
self._try_shutdown_all()
six.reraise(*sys.exc_info())
finally:
trace_event.end()

# python2 compatibility
def next(self):
Expand Down
5 changes: 4 additions & 1 deletion python/paddle/fluid/dygraph/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import inspect

import paddle
import paddle.profiler as profiler

from . import parallel_helper
from .. import unique_name
Expand Down Expand Up @@ -904,7 +905,9 @@ def _dygraph_call_func(self, *inputs, **kwargs):

self._built = True

outputs = self.forward(*inputs, **kwargs)
with profiler.RecordEvent(self.full_name(),
profiler.TracerEventType.Forward):
outputs = self.forward(*inputs, **kwargs)

for forward_post_hook in self._forward_post_hooks.values():
hook_result = forward_post_hook(self, inputs, outputs)
Expand Down
5 changes: 5 additions & 0 deletions python/paddle/fluid/dygraph/varbase_patch_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from .parallel import scale_loss
from paddle.fluid.data_feeder import convert_dtype, _PADDLE_DTYPE_2_NUMPY_DTYPE
import paddle.utils.deprecated as deprecated
import paddle.profiler as profiler


class TensorHookRemoveHelper(object):
Expand Down Expand Up @@ -242,6 +243,9 @@ def backward(self, grad_tensor=None, retain_graph=False):

"""
if framework.in_dygraph_mode():
record_event = profiler.RecordEvent(
"Gradient Backward", profiler.TracerEventType.Backward)
record_event.begin()
if grad_tensor is not None:
if core._in_eager_mode():
assert isinstance(
Expand Down Expand Up @@ -277,6 +281,7 @@ def backward(self, grad_tensor=None, retain_graph=False):
core.dygraph_run_backward([self], [grad_tensor],
retain_graph,
framework._dygraph_tracer())
record_event.end()
else:
raise ValueError(
"Variable.backward() is only available in DyGraph mode")
Expand Down
25 changes: 19 additions & 6 deletions python/paddle/fluid/tests/unittests/test_profiler_statistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,15 @@ def test_statistic_case1(self):
mobilenet_node = HostPythonNode(
'MobileNet', profiler.TracerEventType.Forward, 20, 50, 1000, 1001)
yolonet_node = HostPythonNode(
'Yolov3Net', profiler.TracerEventType.Forward, 50, 100, 1000, 1001)
'Yolov3Net', profiler.TracerEventType.Forward, 50, 110, 1000, 1001)

userdefined_node = HostPythonNode('Communication Time',
profiler.TracerEventType.UserDefined,
100, 110, 1000, 1001)

communication_node = HostPythonNode(
'Communication', profiler.TracerEventType.Communication, 105, 110,
1000, 1001)
backward_node = HostPythonNode('Gradient Backward',
profiler.TracerEventType.Backward, 120,
200, 1000, 1001)
Expand Down Expand Up @@ -114,7 +122,9 @@ def test_statistic_case1(self):
optimization_node
])
mobilenet_node.children_node.append(conv2d_node)
yolonet_node.children_node.append(sync_batch_norm_node)
yolonet_node.children_node.extend(
[sync_batch_norm_node, userdefined_node])
userdefined_node.children_node.append(communication_node)
conv2d_node.children_node.extend(
[conv2d_infer_shape, conv2d_compute, conv2d_MemCpy])
conv2d_compute.runtime_node.append(conv2d_launchkernel)
Expand Down Expand Up @@ -145,7 +155,7 @@ def test_statistic_case1(self):
profiler.TracerEventType.ProfileStep), 400)
self.assertEqual(
time_range_summary.get_cpu_range_sum(
profiler.TracerEventType.Forward), 90)
profiler.TracerEventType.Forward), 100)
self.assertEqual(
time_range_summary.get_cpu_range_sum(
profiler.TracerEventType.Backward), 80)
Expand All @@ -169,15 +179,18 @@ def test_statistic_case1(self):
0, profiler.TracerEventType.Memcpy), 60)
self.assertEqual(
time_range_summary.get_cpu_range_sum(
profiler.TracerEventType.UserDefined), 15)
profiler.TracerEventType.UserDefined), 25)
self.assertEqual(
time_range_summary.get_cpu_range_sum(
profiler.TracerEventType.Communication), 5)
self.assertEqual(len(event_summary.items), 2)
self.assertEqual(len(event_summary.userdefined_items), 0)
self.assertEqual(len(event_summary.userdefined_items), 1)
self.assertEqual(len(event_summary.model_perspective_items), 3)
self.assertEqual(len(event_summary.memory_manipulation_items), 1)
self.assertEqual(event_summary.items['conv2d'].cpu_time, 15)
self.assertEqual(event_summary.items['conv2d'].gpu_time, 25)
self.assertEqual(
event_summary.model_perspective_items['Forward'].cpu_time, 90)
event_summary.model_perspective_items['Forward'].cpu_time, 100)
self.assertEqual(
event_summary.model_perspective_items['Forward'].gpu_time, 135)
self.assertEqual(
Expand Down
Loading