Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[XPU] support xpu runtime profiler: follow up #54690

Merged
merged 2 commits into from
Jun 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,9 @@ cc_library(
if(WITH_GPU)
target_link_libraries(var_type_traits dynload_cuda)
endif()
if(WITH_XPU)
target_link_libraries(var_type_traits dynload_xpti)
endif()

# every source file that includes "dnnl.h" must depends on mkldnn
# or, the first one should depends on mkldnn
Expand Down
15 changes: 15 additions & 0 deletions paddle/fluid/platform/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ if(WITH_GPU OR WITH_ROCM)
set(GPU_CTX_DEPS dynload_cuda dynamic_loader)
endif()

if(WITH_XPU)
set(XPU_CTX_DEPS dynload_xpti dynamic_loader)
endif()

if(WITH_IPU)
set(IPU_CTX_DEPS ipu_info)
else()
Expand Down Expand Up @@ -277,6 +281,17 @@ elseif(WITH_ROCM)
stats
op_proto_maker
shape_inference)
elseif(WITH_XPU)
cc_library(
profiler
SRCS profiler.cc
DEPS phi
enforce
dynload_xpti
new_profiler
stats
op_proto_maker
shape_inference)
else()
cc_library(
profiler
Expand Down
16 changes: 14 additions & 2 deletions paddle/fluid/platform/device/xpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,23 @@ set(XPU_CTX_DEPS
cc_library(
xpu_info
SRCS xpu_info.cc
DEPS glog enforce xpulib device_context place phi)
DEPS glog
enforce
xpulib
device_context
place
phi
dynload_xpti)
cc_library(
xpu_op_list
SRCS xpu_op_list.cc
DEPS glog enforce xpulib device_context op_kernel_type phi)
DEPS glog
enforce
xpulib
device_context
op_kernel_type
phi
dynload_xpti)
cc_library(
xpu_resource_pool
SRCS xpu_resource_pool.cc
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/platform/dynload/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ else()
SRCS warpctc.cc
DEPS dynamic_loader warpctc phi)
endif()
if(WITH_XPU)
cc_library(
dynload_xpti
SRCS xpti.cc
DEPS dynamic_loader phi_dynload_xpti)
endif()

# TODO(TJ): add iomp, mkldnn?

Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/platform/dynload/dynamic_loader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ void* GetCusparseLtDsoHandle() {
return phi::dynload::GetCusparseLtDsoHandle();
}

void* GetXPTIDsoHandle() { return phi::dynload::GetXPTIDsoHandle(); }

} // namespace dynload
} // namespace platform
} // namespace paddle
1 change: 1 addition & 0 deletions paddle/fluid/platform/dynload/dynamic_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ void* GetCUFFTDsoHandle();
void* GetMKLRTDsoHandle();
void* GetROCFFTDsoHandle();
void* GetCusparseLtDsoHandle();
void* GetXPTIDsoHandle();

void SetPaddleLibPath(const std::string&);
} // namespace dynload
Expand Down
11 changes: 10 additions & 1 deletion paddle/fluid/platform/profiler/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ cc_library(
cuda_tracer
SRCS cuda_tracer.cc cupti_data_process.cc
DEPS workqueue_utils enforce glog)
cc_library(
xpu_tracer
SRCS xpu_tracer.cc
DEPS enforce glog)
add_subdirectory(custom_device)
cc_library(
event_node
Expand All @@ -32,7 +36,12 @@ cc_library(
cc_library(
new_profiler
SRCS profiler.cc
DEPS host_tracer cuda_tracer profiler_utils cpu_utilization event_bind
DEPS host_tracer
cuda_tracer
xpu_tracer
profiler_utils
cpu_utilization
event_bind
custom_tracer)
cc_test(
test_event_node
Expand Down
16 changes: 16 additions & 0 deletions paddle/fluid/platform/profiler/profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "paddle/fluid/platform/profiler/host_tracer.h"
#include "paddle/fluid/platform/profiler/trace_event_collector.h"
#include "paddle/fluid/platform/profiler/utils.h"
#include "paddle/fluid/platform/profiler/xpu_tracer.h"
#ifdef PADDLE_WITH_CUSTOM_DEVICE
#include "paddle/phi/backends/device_manager.h"
#endif
Expand All @@ -53,6 +54,10 @@ void SynchronizeDevice() {
phi::DeviceManager::SynchronizeDevice(place);
}
#endif
#ifdef PADDLE_WITH_XPU
// TODO(zhangxiaoci) xpu do not support device sync yet
// KL3 might do
#endif
}

std::atomic<bool> Profiler::alive_{false};
Expand Down Expand Up @@ -82,6 +87,14 @@ bool Profiler::IsCnpapiSupported() {
return supported;
}

bool Profiler::IsXPTISupported() {
bool supported = false;
#ifdef PADDLE_WITH_XPTI
supported = true;
#endif
return supported;
}

Profiler::Profiler(const ProfilerOptions& options,
const std::vector<std::string>& custom_device_types) {
options_ = options;
Expand All @@ -99,6 +112,9 @@ Profiler::Profiler(const ProfilerOptions& options,
tracers_.emplace_back(&CustomTracer::GetInstance(dev_type), false);
}
}
if (trace_switch.test(kProfileXPUOptionBit)) {
tracers_.emplace_back(&XPUTracer::GetInstance(), false);
}
}

Profiler::~Profiler() { alive_.store(false); }
Expand Down
5 changes: 4 additions & 1 deletion paddle/fluid/platform/profiler/profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,13 @@ namespace platform {

static constexpr uint32_t kProfileCPUOptionBit = 0;
static constexpr uint32_t kProfileGPUOptionBit = 1;
static constexpr uint32_t kProfileXPUOptionBit = 2;
static constexpr uint32_t kProfileCustomDeviceOptionBit = 3;

void SynchronizeDevice();

struct ProfilerOptions {
uint32_t trace_switch = 0; // bit 0: cpu, bit 1: gpu
uint32_t trace_switch = 0; // bit 0: cpu, bit 1: gpu, bit 2: xpu
uint32_t trace_level = FLAGS_host_trace_level;
};

Expand All @@ -57,6 +58,8 @@ class Profiler {

static bool IsCnpapiSupported();

static bool IsXPTISupported();

void Prepare();

void Start();
Expand Down
13 changes: 8 additions & 5 deletions paddle/fluid/platform/profiler/xpu_tracer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

#include "paddle/fluid/platform/profiler/xpu_tracer.h"

Expand All @@ -23,6 +24,7 @@
#include "paddle/phi/backends/device_manager.h"
#endif

#ifdef PADDLE_WITH_XPTI
#define XPTI_CALL(call) \
do { \
XPTIResult _status = call; \
Expand All @@ -31,6 +33,7 @@
exit(-1); \
} \
} while (0)
#endif // PADDLE_WITH_XPTI

namespace paddle {
namespace platform {
Expand All @@ -40,7 +43,7 @@ void XPUTracer::PrepareTracing() {
state_ == TracerState::UNINITED || state_ == TracerState::STOPED,
true,
platform::errors::PreconditionNotMet("XPUTracer must be UNINITED"));
#ifdef PADDLE_WITH_XPU
#ifdef PADDLE_WITH_XPTI
XPTI_CALL(dynload::xptiActivityEnable());
VLOG(3) << "enable xpti activity";
#endif
Expand All @@ -52,7 +55,7 @@ void XPUTracer::StartTracing() {
state_ == TracerState::READY,
true,
platform::errors::PreconditionNotMet("Tracer must be READY or STOPPED"));
#ifdef PADDLE_WITH_XPU
#ifdef PADDLE_WITH_XPTI
XPTI_CALL(dynload::xptiStartTracing());
#endif
tracing_start_ns_ = PosixInNsec();
Expand All @@ -64,15 +67,15 @@ void XPUTracer::StopTracing() {
state_,
TracerState::STARTED,
platform::errors::PreconditionNotMet("Tracer must be STARTED"));
#ifdef PADDLE_WITH_XPU
#ifdef PADDLE_WITH_XPTI
XPTI_CALL(dynload::xptiStopTracing());
XPTI_CALL(dynload::xptiActivityDisable());
VLOG(3) << "disable xpti activity";
#endif
state_ = TracerState::STOPED;
}

#ifdef PADDLE_WITH_XPU
#ifdef PADDLE_WITH_XPTI
void AddApiRecord(const baidu::xpu::xpti::XPTIEventApi* api,
uint64_t start_ns,
TraceEventCollector* collector) {
Expand Down Expand Up @@ -158,7 +161,7 @@ void XPUTracer::CollectTraceData(TraceEventCollector* collector) {
state_,
TracerState::STOPED,
platform::errors::PreconditionNotMet("Tracer must be STOPED"));
#ifdef PADDLE_WITH_XPU
#ifdef PADDLE_WITH_XPTI
XPTI_CALL(dynload::xptiActivityFlushAll());
baidu::xpu::xpti::XPTIEvent* record = nullptr;
while (true) {
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/pybind/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ if(WITH_NCCL OR WITH_RCCL)
set(PYBIND_DEPS ${PYBIND_DEPS} reducer)
endif()

if(WITH_XPU)
set(PYBIND_DEPS ${PYBIND_DEPS} dynload_xpti)
endif()

if(WITH_XPU_BKCL)
set(PYBIND_DEPS ${PYBIND_DEPS} reducer)
set(PYBIND_DEPS ${PYBIND_DEPS} bkcl_context)
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/pybind/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2408,6 +2408,7 @@ All parameter, weight, gradient are variables in Paddle.
.def("is_cupti_supported", &paddle::platform::Profiler::IsCuptiSupported)
.def("is_cnpapi_supported",
&paddle::platform::Profiler::IsCnpapiSupported)
.def("is_xpti_supported", &paddle::platform::Profiler::IsXPTISupported)
.def("prepare",
[](paddle::platform::Profiler *profiler) {
platform::EnableHostEventRecorder();
Expand Down
1 change: 1 addition & 0 deletions paddle/phi/backends/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ if(WITH_XPU)
list(APPEND BACKENDS_SRCS xpu/xpu_context.cc xpu/xpu_info.cc)
list(APPEND BACKENDS_SRCS xpu/xpu_op_list.cc xpu/xpu1_op_list.cc
xpu/xpu2_op_list.cc xpu/xpu_l3_strategy.cc)
list(APPEND BACKENDS_DEPS phi_dynload_xpti)
endif()

if(WITH_MKLDNN)
Expand Down
7 changes: 7 additions & 0 deletions paddle/phi/backends/dynload/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ if(WITH_MKLML)
endif()
endif()

if(WITH_XPU)
cc_library(
phi_dynload_xpti
SRCS xpti.cc
DEPS phi)
endif()

if(WITH_FLASHATTN)
list(APPEND DYNLOAD_COMMON_SRCS flashattn.cc)
endif()
Expand Down
12 changes: 12 additions & 0 deletions paddle/phi/backends/dynload/dynamic_loader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ PHI_DEFINE_string(rccl_dir,
"dlopen will search rccl from LD_LIBRARY_PATH");
#endif

#ifdef PADDLE_WITH_XPU
DEFINE_string(xpti_dir, "", "Specify path for loading libxpti.so.");
#endif

namespace phi {
namespace dynload {

Expand Down Expand Up @@ -601,5 +605,13 @@ void* GetCusparseLtDsoHandle() {
#endif
}

void* GetXPTIDsoHandle() {
#ifdef PADDLE_WITH_XPTI
return GetDsoHandleFromSearchPath(FLAGS_xpti_dir, "libxpti.so");
#else
return nullptr;
#endif
}

} // namespace dynload
} // namespace phi
1 change: 1 addition & 0 deletions paddle/phi/backends/dynload/dynamic_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ void* GetCUFFTDsoHandle();
void* GetMKLRTDsoHandle();
void* GetROCFFTDsoHandle();
void* GetCusparseLtDsoHandle();
void* GetXPTIDsoHandle();

void SetPaddleLibPath(const std::string&);

Expand Down
15 changes: 13 additions & 2 deletions python/paddle/profiler/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,19 @@ class ProfilerState(Enum):

class ProfilerTarget(Enum):
r"""
ProfilerTarget is used to specify target device for :ref:`profiling <api_paddle_profiler_Profiler>` . Only CPU and GPU are supported currently.
ProfilerTarget is used to specify target device for :ref:`profiling <api_paddle_profiler_Profiler>` . Only CPU, GPU and XPU are supported currently.

The meaning of each ProfilerState is as following

- **ProfilerTarget.CPU** : Profile events on CPU.

- **ProfilerTarget.GPU** : Profile events on GPU.

- **ProfilerTarget.XPU** : Profile events on XPU.
"""
CPU = 0
GPU = 1
XPU = 2
CUSTOM_DEVICE = 3


Expand Down Expand Up @@ -334,6 +337,12 @@ def _get_supported_targets() -> Iterable[ProfilerTarget]:
ProfilerTarget.CPU,
ProfilerTarget.CUSTOM_DEVICE,
]
if _Profiler.is_xpti_supported():
return [
ProfilerTarget.CPU,
ProfilerTarget.XPU,
ProfilerTarget.CUSTOM_DEVICE,
]
return [ProfilerTarget.CPU, ProfilerTarget.CUSTOM_DEVICE]


Expand All @@ -342,7 +351,7 @@ class Profiler:
Profiler context manager, user interface to manage profiling process to start, stop, export profiling data and print summary table.

Args:
targets (list, optional): specify target devices to profile, and all existing and supported devices will be chosen by default. Currently supported values, :ref:`ProfilerTarget.CPU <api_paddle_profiler_ProfilerTarget>` and :ref:`ProfilerTarget.GPU <api_paddle_profiler_ProfilerTarget>` .
targets (list, optional): specify target devices to profile, and all existing and supported devices will be chosen by default. Currently supported values, :ref:`ProfilerTarget.CPU <api_paddle_profiler_ProfilerTarget>` , :ref:`ProfilerTarget.GPU <api_paddle_profiler_ProfilerTarget>` and :ref:`ProfilerTarget.XPU <api_paddle_profiler_ProfilerTarget>` .
scheduler (Callable|tuple, optional): If it is a callable object, it takes a step number as parameter and return the corresponding :ref:`ProfilerState <api_paddle_profiler_ProfilerState>`. This callable object can be generated by :ref:`make_scheduler <api_paddle_profiler_make_scheduler>` function.
If not provided (None), the default scheduler will keep tracing until the profiler exits. If it is a tuple, it has two values start_batch and end_batch,
which means profiling range [start_batch, end_batch).
Expand Down Expand Up @@ -495,6 +504,8 @@ def __init__(
profileoption.trace_switch |= 1
if ProfilerTarget.GPU in self.targets:
profileoption.trace_switch |= 1 << 1
if ProfilerTarget.XPU in self.targets:
profileoption.trace_switch |= 1 << 2
if ProfilerTarget.CUSTOM_DEVICE in self.targets:
profileoption.trace_switch |= 1 << 3
if not custom_device_types:
Expand Down
4 changes: 4 additions & 0 deletions python/setup.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,10 @@ if '${WITH_XPU_XFT}' == 'ON':
shutil.copy('${XPU_XFT_LIB}', libs_path)
package_data['paddle.libs']+=['${XPU_XFT_LIB_NAME}']

if '${WITH_XPTI}' == 'ON':
shutil.copy('${XPU_XPTI_LIB}', libs_path)
package_data['paddle.libs']+=['${XPU_XPTI_LIB_NAME}']

# remove unused paddle/libs/__init__.py
if os.path.isfile(libs_path+'/__init__.py'):
os.remove(libs_path+'/__init__.py')
Expand Down
Loading