diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index 1bf6f12e63cbb..16ed7194b6d09 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -183,7 +183,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); platform::RecordEvent record_reorder("ext_reorder", - platform::EventRole::kUniqueOp); + platform::TracerEventType::UserDefined, + 2, platform::EventRole::kUniqueOp); reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); astream.wait(); } else { diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 7ab4e2acecfcc..242bc38cbdeeb 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -32,6 +32,7 @@ limitations under the License. */ #include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/pten/common/scalar.h" #include "paddle/pten/common/scalar_array.h" #include "paddle/pten/core/kernel_factory.h" @@ -261,10 +262,12 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { // TODO(wangchaochaohu) : refine code to use only one RecordEvent) // in order to record different op type cost time // and different op name cost time,we set two event. - platform::RecordEvent op_type_record_event(Type()); + platform::RecordEvent op_type_record_event( + Type().c_str(), platform::TracerEventType::Operator, 1); auto op_name = platform::OpName(outputs_, Type()); platform::RecordEvent op_name_record_event( - op_name, platform::EventRole::kUniqueOp); + op_name, platform::TracerEventType::Operator, 1, + platform::EventRole::kUniqueOp); RunImpl(scope, place); } @@ -1253,7 +1256,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, Scope* transfer_scope = nullptr; { platform::RecordEvent record_event("prepare_data", - platform::EventRole::kInnerOp); + platform::TracerEventType::OperatorInner, + 1, platform::EventRole::kInnerOp); if (need_prepare_data_) { transfer_scope = PrepareData(scope, *kernel_type_, &transfered_inplace_vars, runtime_ctx); @@ -1265,7 +1269,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, if (!all_kernels_must_compute_runtime_shape_) { platform::RecordEvent record_event("infer_shape", - platform::EventRole::kInnerOp); + platform::TracerEventType::OperatorInner, + 1, platform::EventRole::kInnerOp); RuntimeInferShapeContext infer_shape_ctx(*this, *runtime_ctx); this->Info().infer_shape_(&infer_shape_ctx); } @@ -1278,7 +1283,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, // not Scope. Imperative mode only pass inputs and get outputs. { platform::RecordEvent record_event("compute", - platform::EventRole::kInnerOp); + platform::TracerEventType::OperatorInner, + 1, platform::EventRole::kInnerOp); if (run_pten_kernel_) { pten::KernelContext pt_kernel_context; // Do data transform before building KernelContext diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index c56f82d0bc084..4e86220e154e6 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -27,7 +27,7 @@ #endif #include "paddle/fluid/framework/library_type.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" -#include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" DECLARE_bool(check_nan_inf); DECLARE_bool(benchmark); @@ -348,16 +348,18 @@ static void PreparedOpRunImpl( framework::Scope scope; { - platform::RecordEvent record_event(op.Type() + " infer_shape", - platform::EventRole::kInnerOp); + platform::RecordEvent record_event(op.Type() + "::infer_shape", + platform::TracerEventType::OperatorInner, + 1, platform::EventRole::kInnerOp); DygraphInferShapeContext infer_shape_ctx( &ins, &outs, &attrs, &default_attrs, op.Type(), &kernel_type); op.Info().infer_shape_(&infer_shape_ctx); } { - platform::RecordEvent record_event(op.Type() + " compute", - platform::EventRole::kInnerOp); + platform::RecordEvent record_event(op.Type() + "::compute", + platform::TracerEventType::OperatorInner, + 1, platform::EventRole::kInnerOp); func(DygraphExecutionContext(op, scope, *dev_ctx, ctx, ins, outs, attrs, default_attrs)); @@ -403,16 +405,18 @@ static void PreparedOpRunPtImpl( const framework::AttributeMap& attrs, const framework::AttributeMap& default_attrs) { { - platform::RecordEvent record_event(op.Type() + " infer_shape", - platform::EventRole::kInnerOp); + platform::RecordEvent record_event(op.Type() + "::infer_shape", + platform::TracerEventType::OperatorInner, + 1, platform::EventRole::kInnerOp); DygraphInferShapeContext infer_shape_ctx( &ins, &outs, &attrs, &default_attrs, op.Type(), &kernel_type); op.Info().infer_shape_(&infer_shape_ctx); } { - platform::RecordEvent record_event(op.Type() + " compute", - platform::EventRole::kInnerOp); + platform::RecordEvent record_event(op.Type() + "::compute", + platform::TracerEventType::OperatorInner, + 1, platform::EventRole::kInnerOp); PreparePtenData(pt_kernel, pt_kernel_signature, ins); diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc index 9305f42021192..fc90eda9f448c 100644 --- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc +++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc @@ -57,8 +57,9 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel { handler.AcquireDstMemory(dx, dout->format(), ctx.GetPlace()); auto reorder_p = handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p); - platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::RecordEvent record_reorder( + "int_reorder", platform::TracerEventType::UserDefined, 2, + platform::EventRole::kUniqueOp); reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); astream.wait(); @@ -73,8 +74,9 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel { handler.AcquireDstMemory(dy, dout->format(), ctx.GetPlace()); auto reorder_p = handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p); - platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::RecordEvent record_reorder( + "int_reorder", platform::TracerEventType::UserDefined, 2, + platform::EventRole::kUniqueOp); reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); astream.wait(); diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc index 642ee1feb7a5d..fe505fe2e51a8 100644 --- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc +++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc @@ -57,8 +57,9 @@ class EltwiseSubMKLDNNGradKernel : public ElemwiseGradKernel { handler.AcquireDstMemory(dx, dout->format(), ctx.GetPlace()); auto reorder_p = handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p); - platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::RecordEvent record_reorder( + "int_reorder", platform::TracerEventType::UserDefined, 2, + platform::EventRole::kUniqueOp); reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); astream.wait(); @@ -78,8 +79,9 @@ class EltwiseSubMKLDNNGradKernel : public ElemwiseGradKernel { reorder_attr.set_output_scales(0, scales); auto reorder_p = std::make_shared( *(reorder_src_memory_p), *(reorder_dst_memory_p), reorder_attr); - platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::RecordEvent record_reorder( + "int_reorder", platform::TracerEventType::UserDefined, 2, + platform::EventRole::kUniqueOp); reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); astream.wait(); diff --git a/paddle/fluid/operators/marker_op.cc b/paddle/fluid/operators/marker_op.cc index 397e3bfc6ad26..277a730be9c30 100644 --- a/paddle/fluid/operators/marker_op.cc +++ b/paddle/fluid/operators/marker_op.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" namespace paddle { namespace operators { @@ -63,8 +63,9 @@ class MarkerOpCPUKernel : public framework::OpKernel { auto marker_pos = ctx.Attr("marker_pos"); platform::RecordEvent record_event( - "MarkerCPU", platform::EventRole::kInnerOp, - "marker_" + marker_role + "_" + marker_pos); + "MarkerCPU", "marker_" + marker_role + "_" + marker_pos, + platform::TracerEventType::OperatorInner, 1, + platform::EventRole::kInnerOp); } }; } // namespace operators diff --git a/paddle/fluid/operators/marker_op.cu b/paddle/fluid/operators/marker_op.cu index b918210389169..cfa5c6dc7a918 100644 --- a/paddle/fluid/operators/marker_op.cu +++ b/paddle/fluid/operators/marker_op.cu @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" namespace paddle { namespace operators { @@ -45,8 +45,9 @@ class MarkerOpCUDAKernel : public framework::OpKernel { auto* in_temp = A.mutable_data({32, 1}, ctx.GetPlace()); auto* out_temp = B.mutable_data({32, 1}, ctx.GetPlace()); platform::RecordEvent record_event( - "MarkerCUDA", platform::EventRole::kInnerOp, - "marker_" + marker_role + "_" + marker_pos); + "MarkerCUDA", "marker_" + marker_role + "_" + marker_pos, + platform::TracerEventType::OperatorInner, 1, + platform::EventRole::kInnerOp); SimpleMarkerKernel<<<1, 32, 0, dev_ctx.stream()>>>(in_temp, out_temp, 32); } diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index c5215751c8325..5774c3a16766a 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -976,8 +976,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel { handler.AcquireReorder(reorder_dst_memory_p, diff_weights_memory_p); { - platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::RecordEvent record_reorder( + "int_reorder", platform::TracerEventType::UserDefined, 2, + platform::EventRole::kUniqueOp); reorder_p->execute(astream, *diff_weights_memory_p, *reorder_dst_memory_p); astream.wait(); diff --git a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc index 4a3d1f455bd26..1d565839fc4ed 100644 --- a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc @@ -264,8 +264,9 @@ class ConvTransposeMKLDNNHandlerT dev_ctx.SetBlob(key_reorder_p, reorder_p); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); - platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::RecordEvent record_reorder( + "int_reorder", platform::TracerEventType::UserDefined, 2, + platform::EventRole::kUniqueOp); reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, {DNNL_ARG_TO, *target_memory_p}}); astream.wait(); @@ -286,8 +287,9 @@ class ConvTransposeMKLDNNHandlerT auto reorder_p = std::static_pointer_cast( dev_ctx.GetBlob(key_reorder_p)); if (reorder_p != nullptr) { - platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::RecordEvent record_reorder( + "int_reorder", platform::TracerEventType::UserDefined, 2, + platform::EventRole::kUniqueOp); reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, {DNNL_ARG_TO, *target_memory_p}}); astream.wait(); diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index 153b0be6dad8f..7296a91f30d6f 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -284,8 +284,9 @@ class FCPrimitiveFactory { auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); { - platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::RecordEvent record_reorder( + "int_reorder", platform::TracerEventType::UserDefined, 2, + platform::EventRole::kUniqueOp); reorder.execute(astream, src_mem, *dst_mem); astream.wait(); } @@ -312,8 +313,9 @@ class FCPrimitiveFactory { auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); { - platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::RecordEvent record_reorder( + "int_reorder", platform::TracerEventType::UserDefined, 2, + platform::EventRole::kUniqueOp); reorder.execute(astream, {{DNNL_ARG_FROM, *src_mem}, {DNNL_ARG_TO, *dst_mem}}); astream.wait(); diff --git a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc index bc2dbf5696813..31c41f89d8a06 100644 --- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc @@ -116,8 +116,9 @@ class MulPrimitiveFactory { auto &astream = platform::MKLDNNDeviceContext::tls().get_stream(); { - platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::RecordEvent record_reorder( + "int_reorder", platform::TracerEventType::UserDefined, 2, + platform::EventRole::kUniqueOp); reorder.execute(astream, src_mem, dst_mem); astream.wait(); } @@ -277,8 +278,9 @@ class MulPrimitiveFactory { auto &astream = platform::MKLDNNDeviceContext::tls().get_stream(); { - platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::RecordEvent record_reorder( + "int_reorder", platform::TracerEventType::UserDefined, 2, + platform::EventRole::kUniqueOp); reorder.execute(astream, src_mem, dst_mem); astream.wait(); } diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 9dbfe7013fae8..7515d810e0b62 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -23,7 +23,7 @@ limitations under the License. */ #include "dnnl.hpp" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/place.h" -#include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" namespace paddle { #ifdef PADDLE_WITH_MKLDNN using MKLDNNMemoryFormat = dnnl::memory::format_tag; @@ -190,7 +190,8 @@ inline void Reorder(dnnl::memory src, dnnl::memory dst, auto reorder_prim = dnnl::reorder(src, dst); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::TracerEventType::UserDefined, + 2, platform::EventRole::kUniqueOp); reorder_prim.execute(astream, src, dst); astream.wait(); } diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 8d706263f029c..7b8ca1ca42860 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -197,7 +197,8 @@ class MKLDNNHandlerNoCachingT { auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::TracerEventType::UserDefined, + 2, platform::EventRole::kUniqueOp); reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, {DNNL_ARG_TO, *target_memory_p}}); astream.wait(); @@ -221,8 +222,9 @@ class MKLDNNHandlerNoCachingT { std::make_shared(*user_memory_p, *target_memory_p); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); - platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::RecordEvent record_reorder( + "int_reorder", platform::TracerEventType::UserDefined, 2, + platform::EventRole::kUniqueOp); reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, {DNNL_ARG_TO, *target_memory_p}}); astream.wait(); @@ -514,7 +516,8 @@ class MKLDNNHandlerT { auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::TracerEventType::UserDefined, + 2, platform::EventRole::kUniqueOp); reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, {DNNL_ARG_TO, *target_memory_p}}); astream.wait(); @@ -558,8 +561,9 @@ class MKLDNNHandlerT { dev_ctx_.SetBlob(key_reorder_p, reorder_p); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); - platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::RecordEvent record_reorder( + "int_reorder", platform::TracerEventType::UserDefined, 2, + platform::EventRole::kUniqueOp); reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, {DNNL_ARG_TO, *target_memory_p}}); astream.wait(); @@ -580,8 +584,9 @@ class MKLDNNHandlerT { auto reorder_p = std::static_pointer_cast( dev_ctx_.GetBlob(key_reorder_p)); if (reorder_p != nullptr) { - platform::RecordEvent record_reorder("int_reorder", - platform::EventRole::kUniqueOp); + platform::RecordEvent record_reorder( + "int_reorder", platform::TracerEventType::UserDefined, 2, + platform::EventRole::kUniqueOp); reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, {DNNL_ARG_TO, *target_memory_p}}); astream.wait(); diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index 8fecf444dc41b..866bf3c66aa2a 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -66,8 +66,8 @@ double Event::CudaElapsedMs(const Event &e) const { #endif } -RecordEvent::RecordEvent(const char *name, const EventRole role, - uint32_t level) { +RecordEvent::RecordEvent(const char *name, const TracerEventType type, + uint32_t level, const EventRole role) { #ifndef _WIN32 #ifdef PADDLE_WITH_CUDA if (g_enable_nvprof_hook) { @@ -86,11 +86,12 @@ RecordEvent::RecordEvent(const char *name, const EventRole role, is_enabled_ = true; shallow_copy_name_ = name; role_ = role; + type_ = type; start_ns_ = PosixInNsec(); } -RecordEvent::RecordEvent(const std::string &name, const EventRole role, - uint32_t level) { +RecordEvent::RecordEvent(const std::string &name, const TracerEventType type, + uint32_t level, const EventRole role) { #ifndef _WIN32 #ifdef PADDLE_WITH_CUDA if (g_enable_nvprof_hook) { @@ -109,11 +110,13 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role, is_enabled_ = true; name_ = new std::string(name); role_ = role; + type_ = type; start_ns_ = PosixInNsec(); } -RecordEvent::RecordEvent(const std::string &name, const EventRole role, - const std::string &attr, uint32_t level) { +RecordEvent::RecordEvent(const std::string &name, const std::string &attr, + const TracerEventType type, uint32_t level, + const EventRole role) { #ifndef _WIN32 #ifdef PADDLE_WITH_CUDA if (g_enable_nvprof_hook) { @@ -130,6 +133,7 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role, return; } is_enabled_ = true; + type_ = type; name_ = new std::string(name); start_ns_ = PosixInNsec(); attr_ = new std::string(attr); @@ -164,17 +168,15 @@ void RecordEvent::End() { uint64_t end_ns = PosixInNsec(); if (LIKELY(FLAGS_enable_host_event_recorder_hook && is_enabled_)) { if (LIKELY(shallow_copy_name_ != nullptr)) { - HostEventRecorder::GetInstance().RecordEvent(shallow_copy_name_, - start_ns_, end_ns, role_, - TracerEventType::NumTypes); + HostEventRecorder::GetInstance().RecordEvent( + shallow_copy_name_, start_ns_, end_ns, role_, type_); } else if (name_ != nullptr) { if (attr_ == nullptr) { - HostEventRecorder::GetInstance().RecordEvent( - *name_, start_ns_, end_ns, role_, TracerEventType::NumTypes); + HostEventRecorder::GetInstance().RecordEvent(*name_, start_ns_, end_ns, + role_, type_); } else { - HostEventRecorder::GetInstance().RecordEvent( - *name_, start_ns_, end_ns, role_, TracerEventType::NumTypes, - *attr_); + HostEventRecorder::GetInstance().RecordEvent(*name_, start_ns_, end_ns, + role_, type_, *attr_); delete attr_; } delete name_; @@ -301,7 +303,7 @@ void PopMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes, void Mark(const std::string &name) { if (FLAGS_enable_host_event_recorder_hook) { HostEventRecorder::GetInstance().RecordEvent( - name, 0, 0, EventRole::kOrdinary, TracerEventType::NumTypes); + name, 0, 0, EventRole::kOrdinary, TracerEventType::UserDefined); return; } GetEventList().Record(EventType::kMark, name, g_thread_id); diff --git a/paddle/fluid/platform/profiler/event_tracing.h b/paddle/fluid/platform/profiler/event_tracing.h index 2532077bcc3bd..54c5b219310a9 100644 --- a/paddle/fluid/platform/profiler/event_tracing.h +++ b/paddle/fluid/platform/profiler/event_tracing.h @@ -21,12 +21,13 @@ limitations under the License. */ namespace paddle { namespace platform { +static constexpr uint32_t kDefaultTraceLevel = 4; // CPU event tracing. A trace marks something that happens but has no duration // associated with it. For example, thread starts working. // Chrome Trace Viewer Format: Instant Event struct RecordInstantEvent { explicit RecordInstantEvent(const char* name, TracerEventType type, - uint32_t level = 1); + uint32_t level = kDefaultTraceLevel); }; // CPU event tracing. A trace starts when an object of this clas is created and @@ -34,16 +35,21 @@ struct RecordInstantEvent { // Chrome Trace Viewer Format: Duration Event/Complte Event class RecordEvent { public: - explicit RecordEvent(const std::string& name, - const EventRole role = EventRole::kOrdinary, - uint32_t level = 1); + explicit RecordEvent( + const std::string& name, + const TracerEventType type = TracerEventType::UserDefined, + uint32_t level = kDefaultTraceLevel, + const EventRole role = EventRole::kOrdinary); - explicit RecordEvent(const char* name, - const EventRole role = EventRole::kOrdinary, - uint32_t level = 1); + explicit RecordEvent(const char* name, const TracerEventType type = + TracerEventType::UserDefined, + uint32_t level = kDefaultTraceLevel, + const EventRole role = EventRole::kOrdinary); - RecordEvent(const std::string& name, const EventRole role, - const std::string& attr, uint32_t level = 1); + RecordEvent(const std::string& name, const std::string& attr, + const TracerEventType type = TracerEventType::UserDefined, + uint32_t level = kDefaultTraceLevel, + const EventRole role = EventRole::kOrdinary); // Stop event tracing explicitly before the object goes out of scope. // Sometimes it's inconvenient to use RAII @@ -65,6 +71,7 @@ class RecordEvent { // different kernel invocations within an op. // std::string full_name_; EventRole role_{EventRole::kOrdinary}; + TracerEventType type_{TracerEventType::UserDefined}; std::string* attr_{nullptr}; bool finished_{false}; }; diff --git a/paddle/fluid/platform/profiler/trace_event.h b/paddle/fluid/platform/profiler/trace_event.h index 3e4903f6ffb64..61f96218560ec 100644 --- a/paddle/fluid/platform/profiler/trace_event.h +++ b/paddle/fluid/platform/profiler/trace_event.h @@ -36,6 +36,18 @@ enum class TracerEventType { Memset = 6, // Used to mark record defined by user UserDefined = 7, + // Used to mark operator detail, (such as infer shape, compute) + OperatorInner = 8, + // Used to mark model training or testing perspective, forward process + Forward = 9, + // Used to mark model training perspective, backward process + Backward = 10, + // Used to mark model training perspective, optimization process + Optimization = 11, + // Used to mark distributed training perspective + Communication = 12, + // Used to mark python api + PythonOp = 13, // A flag to denote the number of current types NumTypes };