Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Marker op for profiling #33034

Merged
merged 3 commits into from
May 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions paddle/fluid/operators/marker_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/profiler.h"

namespace paddle {
namespace operators {

class MarkerOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
std::string marker_role = ctx->Attrs().Get<std::string>("marker_role");
std::string marker_pos = ctx->Attrs().Get<std::string>("marker_pos");

VLOG(3) << "The role is:" << marker_role << ";"
<< "The position is:" << marker_pos << ".";
}

protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(framework::proto::VarType::FP32,
ctx.GetPlace());
}
};

class MarkerOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() {
AddAttr<std::string>("marker_role",
"(string, default forward)forward or backward,"
" mark different stages of porcess.")
.SetDefault("forward");
AddAttr<std::string>(
"marker_pos",
"(string, default B)the posititon where the marker is placed, "
"B stands for begin of duration,"
" E stands for end of duration.")
.SetDefault("B");
AddComment(
R"DOC(Marker Operator - Add marker at the beginning/end of a forward/backward process.)DOC");
}
};

template <typename T>
class MarkerOpCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto marker_role = ctx.Attr<std::string>("marker_role");
auto marker_pos = ctx.Attr<std::string>("marker_pos");

platform::RecordEvent record_event(
"MarkerCPU", platform::EventRole::kInnerOp,
"marker_" + marker_role + "_" + marker_pos);
}
};
} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;

REGISTER_OP_WITHOUT_GRADIENT(marker, ops::MarkerOp, ops::MarkerOpMaker);
REGISTER_OP_CPU_KERNEL(marker, ops::MarkerOpCPUKernel<float>);
61 changes: 61 additions & 0 deletions paddle/fluid/operators/marker_op.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/profiler.h"

namespace paddle {
namespace operators {

template <typename T>
__global__ void SimpleMarkerKernel(T* in, T* out, int ndim) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
for (; idx < ndim; idx += blockDim.x * gridDim.x) {
out[idx] = in[idx];
}
}

template <typename T>
class MarkerOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();

auto marker_role = ctx.Attr<std::string>("marker_role");
auto marker_pos = ctx.Attr<std::string>("marker_pos");
VLOG(3) << "marker role: " << marker_role
<< " marker position: " << marker_pos;

framework::Tensor A;
framework::Tensor B;
auto* in_temp = A.mutable_data<T>({32, 1}, ctx.GetPlace());
auto* out_temp = B.mutable_data<T>({32, 1}, ctx.GetPlace());
platform::RecordEvent record_event(
"MarkerCUDA", platform::EventRole::kInnerOp,
"marker_" + marker_role + "_" + marker_pos);
SimpleMarkerKernel<T><<<1, 32, 0, dev_ctx.stream()>>>(in_temp, out_temp,
32);
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
namespace plat = paddle::platform;

REGISTER_OP_CUDA_KERNEL(marker, ops::MarkerOpCUDAKernel<float>);
2 changes: 1 addition & 1 deletion paddle/fluid/platform/device_tracer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ class DeviceTracerImpl : public DeviceTracer {
auto c = correlations_.find(r.correlation_id);
if (c != correlations_.end() && c->second != nullptr) {
event->set_name(c->second->name());
event->set_detail_info(r.name);
event->set_detail_info(c->second->attr());
find++;
} else {
VLOG(10) << "Missing Kernel Event: " + r.name;
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/platform/event.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class Event {
// The DeviceContext is used to get the cuda stream.
// If CPU profiling mode, can pass nullptr.
Event(EventType type, std::string name, uint32_t thread_id,
EventRole role = EventRole::kOrdinary);
EventRole role = EventRole::kOrdinary, std::string attr = "none");

const EventType& type() const;
Event* parent() const { return parent_; }
Expand All @@ -50,7 +50,7 @@ class Event {
uint32_t thread_id() const { return thread_id_; }
void set_name(std::string name) { name_ = name; }
void set_role(EventRole role) { role_ = role; }

std::string attr() const { return attr_; }
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#ifndef PADDLE_WITH_CUPTI
gpuEvent_t event() const { return event_; }
Expand All @@ -69,6 +69,7 @@ class Event {
EventRole role_{};
int64_t cpu_ns_;
bool visited_status_{false};
std::string attr_;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#ifdef PADDLE_WITH_CUPTI
int64_t gpu_ns_ = 0;
Expand Down
23 changes: 15 additions & 8 deletions paddle/fluid/platform/profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,12 @@ namespace platform {
MemEvenRecorder MemEvenRecorder::recorder;

Event::Event(EventType type, std::string name, uint32_t thread_id,
EventRole role)
: type_(type), name_(name), thread_id_(thread_id), role_(role) {
EventRole role, std::string attr)
: type_(type),
name_(name),
thread_id_(thread_id),
role_(role),
attr_(attr) {
cpu_ns_ = GetTimeInNsec();
}

Expand All @@ -52,7 +56,8 @@ double Event::CudaElapsedMs(const Event &e) const {
#endif
}

RecordEvent::RecordEvent(const std::string &name, const EventRole role) {
RecordEvent::RecordEvent(const std::string &name, const EventRole role,
const std::string attr) {
#ifndef _WIN32
#ifdef PADDLE_WITH_CUDA
if (g_enable_nvprof_hook) {
Expand All @@ -69,7 +74,7 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role) {
is_enabled_ = true;
// lock is not needed, the code below is thread-safe
// Maybe need the same push/pop behavior.
Event *e = PushEvent(name, role);
Event *e = PushEvent(name, role, attr);
SetCurAnnotation(e);
name_ = e->name();
}
Expand Down Expand Up @@ -186,12 +191,14 @@ void Mark(const std::string &name) {
GetEventList().Record(EventType::kMark, name, g_thread_id);
}

Event *PushEvent(const std::string &name, const EventRole role) {
return GetEventList().Record(EventType::kPushRange, name, g_thread_id, role);
Event *PushEvent(const std::string &name, const EventRole role,
std::string attr) {
return GetEventList().Record(EventType::kPushRange, name, g_thread_id, role,
attr);
}

void PopEvent(const std::string &name, const EventRole role) {
GetEventList().Record(EventType::kPopRange, name, g_thread_id, role);
void PopEvent(const std::string &name, const EventRole role, std::string attr) {
GetEventList().Record(EventType::kPopRange, name, g_thread_id, role, attr);
}
void EnableProfiler(ProfilerState state) {
PADDLE_ENFORCE_NE(state, ProfilerState::kDisabled,
Expand Down
9 changes: 6 additions & 3 deletions paddle/fluid/platform/profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ struct MemEvenRecorder {

struct RecordEvent {
RecordEvent(const std::string& name,
const EventRole role = EventRole::kOrdinary);
const EventRole role = EventRole::kOrdinary,
const std::string attr = "none");

~RecordEvent();

Expand Down Expand Up @@ -200,8 +201,10 @@ void PushMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
const Place& place, const std::string& annotation);
void PopMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
const Place& place, const std::string& annotation);
Event* PushEvent(const std::string& name, const EventRole role);
void PopEvent(const std::string& name, const EventRole role);
Event* PushEvent(const std::string& name, const EventRole role,
const std::string attr = "none");
void PopEvent(const std::string& name, const EventRole role,
const std::string attr = "none");
// Return the event list of all threads. Assumed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
std::vector<std::vector<Event>> GetAllEvents();
Expand Down
36 changes: 36 additions & 0 deletions python/paddle/fluid/tests/unittests/test_marker_op.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from op_test import OpTest
from paddle.distributed.fleet.meta_optimizers.common import OpRole


class TestMarkerOp(OpTest):
def setUp(self):
self.op_type = "marker"
self.inputs = {}
self.attrs = {
'marker_role': 'forward',
'marker_pos': 'B',
'op_role': OpRole.Forward
}
self.outputs = {}

def test_check_output(self):
self.check_output()


if __name__ == "__main__":
unittest.main()
1 change: 1 addition & 0 deletions tools/static_mode_white_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,4 +707,5 @@
'test_lamb_op_xpu',
'test_model_cast_to_bf16',
'test_sgd_op_bf16',
'test_marker_op',
]