Skip to content

Commit

Permalink
Fix Warnings After Switching to oneAPI 2024.0 release (#43)
Browse files Browse the repository at this point in the history
* Fix warnings in SYCL code due to icpx/libsycl upgrade.
  * Most of these are due to SYCL 2020 spec deprecations (e.g. `get_pointer` -> `get_multi_ptr`).
* Prevent dlworkload from crashing but provide notice it is not working
  as expected.

Signed-off-by: Schilling, Matthew <matthew.schilling@intel.com>
  • Loading branch information
mschilling0 authored Dec 13, 2023
1 parent 4700b32 commit 166eeba
Show file tree
Hide file tree
Showing 13 changed files with 85 additions and 77 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,10 @@
**/build
**/__pycache__
build*/
venv/
.vscode/
.vim/
.cache/
compile_commands.json
CMakeUserPresets.json
error_diff.txt
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.49.19
0.49.20
12 changes: 8 additions & 4 deletions samples/dpc_gemm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,23 @@ if(WIN32)
set(CMAKE_CXX_COMPILER "dpcpp-cl.exe")
set(CMAKE_GENERATOR_TOOLSET "Intel(R) oneAPI DPC++ Compiler")
else()
set(CMAKE_CXX_COMPILER "dpcpp")
set(CMAKE_CXX_COMPILER "icpx")
endif()

project(PTI_Samples_DPC_GEMM CXX)
SetCompilerFlags()
SetBuildType()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -gline-tables-only")

add_executable(dpc_gemm main.cc)

target_compile_options(dpc_gemm PUBLIC -fsycl -gline-tables-only)

# target_link_options CMake >= 3.13
set_target_properties(dpc_gemm PROPERTIES LINK_FLAGS "-fsycl -gline-tables-only")

target_include_directories(dpc_gemm
PRIVATE "${PROJECT_SOURCE_DIR}/../../utils")
if(CMAKE_INCLUDE_PATH)
target_include_directories(dpc_gemm
PUBLIC "${CMAKE_INCLUDE_PATH}")
endif()
endif()
44 changes: 24 additions & 20 deletions samples/dpc_gemm/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

#include "utils.h"

#include <CL/sycl.hpp>
#include <sycl/sycl.hpp>

#define A_VALUE 0.128f
#define B_VALUE 0.256f
Expand Down Expand Up @@ -63,9 +63,12 @@ static float RunAndCheck(sycl::queue queue,

cgh.parallel_for<class __GEMM>(sycl::range<2>(size, size),
[=](sycl::id<2> id) {
GEMM(a_acc.get_pointer(),
b_acc.get_pointer(),
c_acc.get_pointer(),
auto a_acc_ptr = a_acc.get_multi_ptr<sycl::access::decorated::no>();
auto b_acc_ptr = b_acc.get_multi_ptr<sycl::access::decorated::no>();
auto c_acc_ptr = c_acc.get_multi_ptr<sycl::access::decorated::no>();
GEMM(a_acc_ptr.get(),
b_acc_ptr.get(),
c_acc_ptr.get(),
size, id);
});
});
Expand Down Expand Up @@ -101,11 +104,21 @@ static void Compute(sycl::queue queue,
}

int main(int argc, char* argv[]) {
sycl::info::device_type device_type = sycl::info::device_type::gpu;
if (argc > 1 && strcmp(argv[1], "cpu") == 0) {
device_type = sycl::info::device_type::cpu;
} else if (argc > 1 && strcmp(argv[1], "host") == 0) {
device_type = sycl::info::device_type::host;
sycl::device dev;
try {
dev = sycl::device(sycl::gpu_selector_v);
if (argc > 1 && strcmp(argv[1], "cpu") == 0) {
dev = sycl::device(sycl::cpu_selector_v);
} else if (argc > 1 && strcmp(argv[1], "host") == 0) {
dev = sycl::device(sycl::default_selector_v);
}
} catch (const sycl::exception& e) {
std::cerr << "Error: Exception caught while executing SYCL " << e.what() << '\n';
std::cerr << "Unable to select valid sycl device" << '\n';
return EXIT_FAILURE;
} catch (...) {
std::cerr << "Unable to select valid sycl device" << '\n';
return EXIT_FAILURE;
}

unsigned size = 1024;
Expand All @@ -118,17 +131,8 @@ int main(int argc, char* argv[]) {
repeat_count = std::stoul(argv[3]);
}

std::unique_ptr<sycl::device_selector> selector(nullptr);
if (device_type == sycl::info::device_type::cpu) {
selector.reset(new sycl::cpu_selector);
} else if (device_type == sycl::info::device_type::gpu) {
selector.reset(new sycl::gpu_selector);
} else if (device_type == sycl::info::device_type::host) {
selector.reset(new sycl::host_selector);
}

sycl::property_list prop_list{sycl::property::queue::enable_profiling()};
sycl::queue queue(*selector.get(), sycl::async_handler{}, prop_list);
sycl::queue queue(dev, sycl::async_handler{}, prop_list);

std::cout << "DPC++ Matrix Multiplication (matrix size: " << size <<
" x " << size << ", repeats " << repeat_count << " times)" << std::endl;
Expand All @@ -148,4 +152,4 @@ int main(int argc, char* argv[]) {

std::cout << "Total execution time: " << time.count() << " sec" << std::endl;
return 0;
}
}
2 changes: 1 addition & 1 deletion samples/dpc_info/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ std::ostream& operator<<(std::ostream& out, sycl::aspect sycl_aspect) {
break;
#endif
default:
out << "<unkown-aspect: " << static_cast<std::size_t>(sycl_aspect) << ">";
out << "<unknown-aspect: " << static_cast<std::size_t>(sycl_aspect) << ">";
break;
}
return out;
Expand Down
6 changes: 4 additions & 2 deletions sdk/samples/dlworkloads/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ void PrintUsage()
std::cout << "It is a largely simpilified application to demo mixed programming ";
std::cout << "on Intel GPU for deep learning (PyTorch&TensorFlow) workloads (ITEX&IPEX) ";
std::cout << "with direct dpcpp kernel, onednn, onemkl, onedpl, onemkl, eigen, etc." << std::endl;
std::cout << "IPEX: https://github.com/intel-innersource/frameworks.ai.pytorch.ipex-gpu" << std::endl;
std::cout << "ITEX: https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow" << std::endl;
std::cout << "IPEX: https://github.com/intel/intel-extension-for-pytorch" << std::endl;
std::cout << std::endl;
std::cout << "The purpose of this application is to provide a basic rough requirement for sycl graph capture mode." << std::endl;
std::cout << std::endl;
Expand All @@ -39,6 +38,9 @@ void PrintUsage()
std::cout << std::endl;
std::cout << "It is supposed that this application will be updated frequently, so this might be not the latest one." << std::endl;
std::cout << std::endl;
#if __LIBSYCL_MAJOR_VERSION >= 7
std::cerr << "Notice: A portion of this sample was not build. To build the whole sample, revert to older oneAPI release (<= 2023.2.0)" << std::endl;
#endif
}

void run(sycl::queue *q)
Expand Down
5 changes: 5 additions & 0 deletions sdk/samples/dlworkloads/model_mixedprogramming.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,15 @@ TinyTensor run_model_mixedprogramming(TinyTensor inp, sycl::queue *q)
TinyTensor outp = run_syclkernel_operation_scaledown(inp, q);
GlobalDeviceMemoryManager().free(inp.data);

// TODO(matthew.schilling@intel.com): Fails when run with XPTI tracing. We
// need to figure out a way to uncomment this. It crashes PTI-SDK and
// Unitrace built with OneAPI/ICPX >= 2024.0.0 .
// the next operation uses oneDNN for conv2d
#if __LIBSYCL_MAJOR_VERSION < 7
inp = outp;
outp = run_onednn_operation_conv2d(inp, q);
GlobalDeviceMemoryManager().free(inp.data);
#endif

// next operation uses oneMKL
inp = outp;
Expand Down
6 changes: 4 additions & 2 deletions sdk/samples/dpc_gemm/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ static float RunAndCheck(sycl::queue queue, const std::vector<float> &a,

cgh.parallel_for<class __GEMM>(
sycl::range<2>(size, size), [=](sycl::id<2> id) {
GEMM(a_acc.get_pointer(), b_acc.get_pointer(), c_acc.get_pointer(),
size, id);
auto a_acc_ptr = a_acc.get_multi_ptr<sycl::access::decorated::no>();
auto b_acc_ptr = b_acc.get_multi_ptr<sycl::access::decorated::no>();
auto c_acc_ptr = c_acc.get_multi_ptr<sycl::access::decorated::no>();
GEMM(a_acc_ptr.get(), b_acc_ptr.get(), c_acc_ptr.get(), size, id);
});
});
queue.wait_and_throw();
Expand Down
6 changes: 4 additions & 2 deletions sdk/samples/dpc_gemm_threaded/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,10 @@ static float RunAndCheck(sycl::queue queue, const std::vector<float>& a,

cgh.parallel_for<class __GEMM>(
sycl::range<2>(size, size), [=](sycl::id<2> id) {
GEMM(a_acc.get_pointer(),b_acc.get_pointer(),c_acc.get_pointer(),
size, id);
auto a_acc_ptr = a_acc.get_multi_ptr<sycl::access::decorated::no>();
auto b_acc_ptr = b_acc.get_multi_ptr<sycl::access::decorated::no>();
auto c_acc_ptr = c_acc.get_multi_ptr<sycl::access::decorated::no>();
GEMM(a_acc_ptr.get(), b_acc_ptr.get(), c_acc_ptr.get(), size, id);
});
});
queue.wait_and_throw();
Expand Down
34 changes: 21 additions & 13 deletions sdk/samples/iso3dfd_dpcpp/src/iso3dfd_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ void Iso3dfdIterationSLM(sycl::nd_item<3> &it, float *next, float *prev,
*
*/
void Iso3dfdIterationGlobal(sycl::nd_item<3> &it, float *next, float *prev,
float *vel, const float *coeff, int nx, int nxy,
const float *vel, const float *coeff, int nx, int nxy,
int bx, int by, int z_offset, int full_end_z) {
// We compute the start and the end position in the grid
// for each work-item.
Expand Down Expand Up @@ -381,17 +381,17 @@ bool Iso3dfdDevice(sycl::queue &q, float *ptr_next, float *ptr_prev,
if (i % 2 == 0)
h.parallel_for(
nd_range(global_nd_range, local_nd_range), [=](auto it) {
Iso3dfdIterationSLM(it, next.get_pointer(), prev.get_pointer(),
vel.get_pointer(), coeff.get_pointer(),
tab.get_pointer(), nx, nxy, bx, by,
Iso3dfdIterationSLM(it, next.get(), prev.get(),
vel.get(), coeff.get(),
tab.get(), nx, nxy, bx, by,
n3_block, end_z);
});
else
h.parallel_for(
nd_range(global_nd_range, local_nd_range), [=](auto it) {
Iso3dfdIterationSLM(it, prev.get_pointer(), next.get_pointer(),
vel.get_pointer(), coeff.get_pointer(),
tab.get_pointer(), nx, nxy, bx, by,
Iso3dfdIterationSLM(it, prev.get(), next.get(),
vel.get(), coeff.get(),
tab.get(), nx, nxy, bx, by,
n3_block, end_z);
});

Expand All @@ -408,17 +408,25 @@ bool Iso3dfdDevice(sycl::queue &q, float *ptr_next, float *ptr_prev,
if (i % 2 == 0)
h.parallel_for(
nd_range(global_nd_range, local_nd_range), [=](auto it) {
Iso3dfdIterationGlobal(it, next.get_pointer(),
prev.get_pointer(), vel.get_pointer(),
coeff.get_pointer(), nx, nxy, bx, by,
auto next_ptr = next.template get_multi_ptr<sycl::access::decorated::no>();
auto prev_ptr = prev.template get_multi_ptr<sycl::access::decorated::no>();
auto vel_ptr = vel.template get_multi_ptr<sycl::access::decorated::no>();
auto coeff_ptr = coeff.template get_multi_ptr<sycl::access::decorated::no>();
Iso3dfdIterationGlobal(it, next_ptr.get(),
prev_ptr.get(), vel_ptr.get(),
coeff_ptr.get(), nx, nxy, bx, by,
n3_block, end_z);
});
else
h.parallel_for(
nd_range(global_nd_range, local_nd_range), [=](auto it) {
Iso3dfdIterationGlobal(it, prev.get_pointer(),
next.get_pointer(), vel.get_pointer(),
coeff.get_pointer(), nx, nxy, bx, by,
auto next_ptr = next.template get_multi_ptr<sycl::access::decorated::no>();
auto prev_ptr = prev.template get_multi_ptr<sycl::access::decorated::no>();
auto vel_ptr = vel.template get_multi_ptr<sycl::access::decorated::no>();
auto coeff_ptr = coeff.template get_multi_ptr<sycl::access::decorated::no>();
Iso3dfdIterationGlobal(it, prev_ptr.get(),
next_ptr.get(), vel_ptr.get(),
coeff_ptr.get(), nx, nxy, bx, by,
n3_block, end_z);
});
#endif
Expand Down
2 changes: 1 addition & 1 deletion sdk/samples/vector_sq_add/vector_sq_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ int main(int argc, char *argv[]) {
auto print_queue_info = [](const sycl::queue &sycl_queue) {
auto queue_type =
get_native<sycl::backend::ext_oneapi_level_zero>(sycl_queue);
#if __LIBSYCL_MAJOR_VERSION >= 6 && __LIBSYCL_MINOR_VERSION >= 2
#if __LIBSYCL_MAJOR_VERSION > 6 || (__LIBSYCL_MAJOR_VERSION == 6 && __LIBSYCL_MINOR_VERSION >= 2)
// 1 (default)
if (auto *ptr_queue_handle =
std::get_if<ze_command_list_handle_t>(&queue_type)) {
Expand Down
5 changes: 4 additions & 1 deletion sdk/test/main_dpcgemm_fixture.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,10 @@ float RunAndCheck(sycl::queue queue, const std::vector<float>& a, const std::vec
auto c_acc = c_buf.get_access<sycl::access::mode::write>(cgh);

cgh.parallel_for<class __GEMM>(sycl::range<2>(size, size), [=](sycl::id<2> id) {
GEMM(a_acc.get_pointer(), b_acc.get_pointer(), c_acc.get_pointer(), size, id);
auto a_acc_ptr = a_acc.get_multi_ptr<sycl::access::decorated::no>();
auto b_acc_ptr = b_acc.get_multi_ptr<sycl::access::decorated::no>();
auto c_acc_ptr = c_acc.get_multi_ptr<sycl::access::decorated::no>();
GEMM(a_acc_ptr.get(), b_acc_ptr.get(), c_acc_ptr.get(), size, id);
});
});
queue.wait_and_throw();
Expand Down
30 changes: 0 additions & 30 deletions sdk/test/main_vecsqadd_fixture.cc
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,6 @@ void RunExternalCorrIdTest(queue &q, const DoubleVector &a, const DoubleVector &
vecAdd(q, a, b, sq_add);
print_results(sq_add, vector_size);

// print_queue_info(q);

StartTracing();
vecAdd(q, c, d, sq_add2);
StopTracing();
Expand Down Expand Up @@ -259,34 +257,6 @@ void RunVecsqadd(TestType a_test_type) {
auto d_selector{gpu_selector_v};
queue q(d_selector, NULL);

// Underlying queue handle object changes based on value of
// SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=?
auto print_queue_info = [](const sycl::queue &sycl_queue) {
auto queue_type = get_native<sycl::backend::ext_oneapi_level_zero>(sycl_queue);
#if __LIBSYCL_MAJOR_VERSION >= 6 && __LIBSYCL_MINOR_VERSION >= 2
// 1 (default)
if (auto *ptr_queue_handle = std::get_if<ze_command_list_handle_t>(&queue_type)) {
printf("Queue ptr: 0x%p, native queue: 0x%p, native device: 0x%p \n", &sycl_queue,
ptr_queue_handle,
get_native<sycl::backend::ext_oneapi_level_zero>(sycl_queue.get_device()));

// 0
} else if (auto *ptr_queue_handle = std::get_if<ze_command_queue_handle_t>(&queue_type)) {
printf("Queue ptr: 0x%p, native queue: 0x%p, native device: 0x%p \n", &sycl_queue,
ptr_queue_handle,
get_native<sycl::backend::ext_oneapi_level_zero>(sycl_queue.get_device()));
} else {
std::cerr << "Underlying level zero queue handle could not be obtained." << '\n';
}
#else
printf("Queue ptr: 0x%p, native queue: 0x%p, native device: 0x%p \n", &sycl_queue,
get_native<sycl::backend::ext_oneapi_level_zero>(sycl_queue),
get_native<sycl::backend::ext_oneapi_level_zero>(sycl_queue.get_device()));
#endif
};

print_queue_info(q);

// Start Tests by Type

if (a_test_type == TestType::RUN_ALL) {
Expand Down

0 comments on commit 166eeba

Please sign in to comment.