Fix Warnings After Switching to oneAPI 2024.0 release (#43)

* Fix warnings in SYCL code due to icpx/libsycl upgrade. * Most of these are due to SYCL 2020 spec deprecations (e.g. `get_pointer` -> `get_multi_ptr`). * Prevent dlworkload from crashing but provide notice it is not working as expected. Signed-off-by: Schilling, Matthew <matthew.schilling@intel.com>
intel · Dec 13, 2023 · 166eeba · 166eeba
1 parent 4700b32
commit 166eeba
Show file tree

Hide file tree

Showing 13 changed files with 85 additions and 77 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,10 @@
 **/build
 **/__pycache__
+build*/
+venv/
+.vscode/
+.vim/
+.cache/
+compile_commands.json
+CMakeUserPresets.json
+error_diff.txt
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.49.19
+0.49.20
diff --git a/samples/dpc_gemm/CMakeLists.txt b/samples/dpc_gemm/CMakeLists.txt
@@ -6,19 +6,23 @@ if(WIN32)
   set(CMAKE_CXX_COMPILER "dpcpp-cl.exe")
   set(CMAKE_GENERATOR_TOOLSET "Intel(R) oneAPI DPC++ Compiler")
 else()
-  set(CMAKE_CXX_COMPILER "dpcpp")
+  set(CMAKE_CXX_COMPILER "icpx")
 endif()
 
 project(PTI_Samples_DPC_GEMM CXX)
 SetCompilerFlags()
 SetBuildType()
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -gline-tables-only")
-
 add_executable(dpc_gemm main.cc)
+
+target_compile_options(dpc_gemm PUBLIC -fsycl -gline-tables-only)
+
+# target_link_options CMake >= 3.13
+set_target_properties(dpc_gemm PROPERTIES LINK_FLAGS "-fsycl -gline-tables-only")
+
 target_include_directories(dpc_gemm
   PRIVATE "${PROJECT_SOURCE_DIR}/../../utils")
 if(CMAKE_INCLUDE_PATH)
   target_include_directories(dpc_gemm
     PUBLIC "${CMAKE_INCLUDE_PATH}")
-endif()
+endif()
diff --git a/samples/dpc_gemm/main.cc b/samples/dpc_gemm/main.cc
@@ -10,7 +10,7 @@
 
 #include "utils.h"
 
-#include <CL/sycl.hpp>
+#include <sycl/sycl.hpp>
 
 #define A_VALUE 0.128f
 #define B_VALUE 0.256f
@@ -63,9 +63,12 @@ static float RunAndCheck(sycl::queue queue,
 
       cgh.parallel_for<class __GEMM>(sycl::range<2>(size, size),
                       [=](sycl::id<2> id) {
-                        GEMM(a_acc.get_pointer(),
-                             b_acc.get_pointer(),
-                             c_acc.get_pointer(),
+                        auto a_acc_ptr = a_acc.get_multi_ptr<sycl::access::decorated::no>();
+                        auto b_acc_ptr = b_acc.get_multi_ptr<sycl::access::decorated::no>();
+                        auto c_acc_ptr = c_acc.get_multi_ptr<sycl::access::decorated::no>();
+                        GEMM(a_acc_ptr.get(),
+                             b_acc_ptr.get(),
+                             c_acc_ptr.get(),
                              size, id);
                       });
     });
@@ -101,11 +104,21 @@ static void Compute(sycl::queue queue,
 }
 
 int main(int argc, char* argv[]) {
-  sycl::info::device_type device_type = sycl::info::device_type::gpu;
-  if (argc > 1 && strcmp(argv[1], "cpu") == 0) {
-    device_type = sycl::info::device_type::cpu;
-  } else if (argc > 1 && strcmp(argv[1], "host") == 0) {
-    device_type = sycl::info::device_type::host;
+  sycl::device dev;
+  try {
+    dev = sycl::device(sycl::gpu_selector_v);
+    if (argc > 1 && strcmp(argv[1], "cpu") == 0) {
+      dev = sycl::device(sycl::cpu_selector_v);
+    } else if (argc > 1 && strcmp(argv[1], "host") == 0) {
+      dev = sycl::device(sycl::default_selector_v);
+    }
+  } catch (const sycl::exception& e) {
+    std::cerr << "Error: Exception caught while executing SYCL " << e.what() << '\n';
+    std::cerr << "Unable to select valid sycl device" << '\n';
+    return EXIT_FAILURE;
+  } catch (...) {
+    std::cerr << "Unable to select valid sycl device" << '\n';
+    return EXIT_FAILURE;
   }
 
   unsigned size = 1024;
@@ -118,17 +131,8 @@ int main(int argc, char* argv[]) {
     repeat_count = std::stoul(argv[3]);
   }
 
-  std::unique_ptr<sycl::device_selector> selector(nullptr);
-  if (device_type == sycl::info::device_type::cpu) {
-    selector.reset(new sycl::cpu_selector);
-  } else if (device_type == sycl::info::device_type::gpu) {
-    selector.reset(new sycl::gpu_selector);
-  } else if (device_type == sycl::info::device_type::host) {
-    selector.reset(new sycl::host_selector);
-  }
-
   sycl::property_list prop_list{sycl::property::queue::enable_profiling()};
-  sycl::queue queue(*selector.get(), sycl::async_handler{}, prop_list);
+  sycl::queue queue(dev, sycl::async_handler{}, prop_list);
 
   std::cout << "DPC++ Matrix Multiplication (matrix size: " << size <<
     " x " << size << ", repeats " << repeat_count << " times)" << std::endl;
@@ -148,4 +152,4 @@ int main(int argc, char* argv[]) {
 
   std::cout << "Total execution time: " << time.count() << " sec" << std::endl;
   return 0;
-}
+}
diff --git a/samples/dpc_info/main.cc b/samples/dpc_info/main.cc
@@ -166,7 +166,7 @@ std::ostream& operator<<(std::ostream& out, sycl::aspect sycl_aspect) {
       break;
 #endif
     default:
-      out << "<unkown-aspect: " << static_cast<std::size_t>(sycl_aspect) << ">";
+      out << "<unknown-aspect: " << static_cast<std::size_t>(sycl_aspect) << ">";
       break;
   }
   return out;

diff --git a/sdk/samples/dlworkloads/main.cpp b/sdk/samples/dlworkloads/main.cpp
@@ -21,8 +21,7 @@ void PrintUsage()
   std::cout << "It is a largely simpilified application to demo mixed programming ";
   std::cout << "on Intel GPU for deep learning (PyTorch&TensorFlow) workloads (ITEX&IPEX) ";
   std::cout << "with direct dpcpp kernel, onednn, onemkl, onedpl, onemkl, eigen, etc." << std::endl;
-  std::cout << "IPEX: https://github.com/intel-innersource/frameworks.ai.pytorch.ipex-gpu" << std::endl;
-  std::cout << "ITEX: https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow" << std::endl;
+  std::cout << "IPEX: https://github.com/intel/intel-extension-for-pytorch" << std::endl;
   std::cout << std::endl;
   std::cout << "The purpose of this application is to provide a basic rough requirement for sycl graph capture mode." << std::endl;
   std::cout << std::endl;
@@ -39,6 +38,9 @@ void PrintUsage()
   std::cout << std::endl;
   std::cout << "It is supposed that this application will be updated frequently, so this might be not the latest one." << std::endl;
   std::cout << std::endl;
+#if __LIBSYCL_MAJOR_VERSION >= 7
+  std::cerr << "Notice: A portion of this sample was not build. To build the whole sample, revert to older oneAPI release (<= 2023.2.0)" << std::endl;
+#endif
 }
 
 void run(sycl::queue *q)

diff --git a/sdk/samples/dlworkloads/model_mixedprogramming.cpp b/sdk/samples/dlworkloads/model_mixedprogramming.cpp
@@ -15,10 +15,15 @@ TinyTensor run_model_mixedprogramming(TinyTensor inp, sycl::queue *q)
   TinyTensor outp = run_syclkernel_operation_scaledown(inp, q);
   GlobalDeviceMemoryManager().free(inp.data);
 
+  // TODO(matthew.schilling@intel.com): Fails when run with XPTI tracing. We
+  // need to figure out a way to uncomment this. It crashes PTI-SDK and
+  // Unitrace built with OneAPI/ICPX >= 2024.0.0 .
   // the next operation uses oneDNN for conv2d
+#if __LIBSYCL_MAJOR_VERSION < 7
   inp = outp;
   outp = run_onednn_operation_conv2d(inp, q);
   GlobalDeviceMemoryManager().free(inp.data);
+#endif
 
   // next operation uses oneMKL
   inp = outp;

diff --git a/sdk/samples/dpc_gemm/main.cc b/sdk/samples/dpc_gemm/main.cc
@@ -80,8 +80,10 @@ static float RunAndCheck(sycl::queue queue, const std::vector<float> &a,
 
       cgh.parallel_for<class __GEMM>(
           sycl::range<2>(size, size), [=](sycl::id<2> id) {
-            GEMM(a_acc.get_pointer(), b_acc.get_pointer(), c_acc.get_pointer(),
-                 size, id);
+            auto a_acc_ptr = a_acc.get_multi_ptr<sycl::access::decorated::no>();
+            auto b_acc_ptr = b_acc.get_multi_ptr<sycl::access::decorated::no>();
+            auto c_acc_ptr = c_acc.get_multi_ptr<sycl::access::decorated::no>();
+            GEMM(a_acc_ptr.get(), b_acc_ptr.get(), c_acc_ptr.get(), size, id);
           });
     });
     queue.wait_and_throw();

diff --git a/sdk/samples/dpc_gemm_threaded/main.cc b/sdk/samples/dpc_gemm_threaded/main.cc
@@ -68,8 +68,10 @@ static float RunAndCheck(sycl::queue queue, const std::vector<float>& a,
 
       cgh.parallel_for<class __GEMM>(
           sycl::range<2>(size, size), [=](sycl::id<2> id) {
-            GEMM(a_acc.get_pointer(),b_acc.get_pointer(),c_acc.get_pointer(),
-                 size, id);
+            auto a_acc_ptr = a_acc.get_multi_ptr<sycl::access::decorated::no>();
+            auto b_acc_ptr = b_acc.get_multi_ptr<sycl::access::decorated::no>();
+            auto c_acc_ptr = c_acc.get_multi_ptr<sycl::access::decorated::no>();
+            GEMM(a_acc_ptr.get(), b_acc_ptr.get(), c_acc_ptr.get(), size, id);
           });
     });
     queue.wait_and_throw();

diff --git a/sdk/samples/iso3dfd_dpcpp/src/iso3dfd_kernels.cpp b/sdk/samples/iso3dfd_dpcpp/src/iso3dfd_kernels.cpp
@@ -196,7 +196,7 @@ void Iso3dfdIterationSLM(sycl::nd_item<3> &it, float *next, float *prev,
  *
  */
 void Iso3dfdIterationGlobal(sycl::nd_item<3> &it, float *next, float *prev,
-                            float *vel, const float *coeff, int nx, int nxy,
+                            const float *vel, const float *coeff, int nx, int nxy,
                             int bx, int by, int z_offset, int full_end_z) {
   // We compute the start and the end position in the grid
   // for each work-item.
@@ -381,17 +381,17 @@ bool Iso3dfdDevice(sycl::queue &q, float *ptr_next, float *ptr_prev,
         if (i % 2 == 0)
           h.parallel_for(
               nd_range(global_nd_range, local_nd_range), [=](auto it) {
-                Iso3dfdIterationSLM(it, next.get_pointer(), prev.get_pointer(),
-                                    vel.get_pointer(), coeff.get_pointer(),
-                                    tab.get_pointer(), nx, nxy, bx, by,
+                Iso3dfdIterationSLM(it, next.get(), prev.get(),
+                                    vel.get(), coeff.get(),
+                                    tab.get(), nx, nxy, bx, by,
                                     n3_block, end_z);
               });
         else
           h.parallel_for(
               nd_range(global_nd_range, local_nd_range), [=](auto it) {
-                Iso3dfdIterationSLM(it, prev.get_pointer(), next.get_pointer(),
-                                    vel.get_pointer(), coeff.get_pointer(),
-                                    tab.get_pointer(), nx, nxy, bx, by,
+                Iso3dfdIterationSLM(it, prev.get(), next.get(),
+                                    vel.get(), coeff.get(),
+                                    tab.get(), nx, nxy, bx, by,
                                     n3_block, end_z);
               });
 
@@ -408,17 +408,25 @@ bool Iso3dfdDevice(sycl::queue &q, float *ptr_next, float *ptr_prev,
         if (i % 2 == 0)
           h.parallel_for(
               nd_range(global_nd_range, local_nd_range), [=](auto it) {
-                Iso3dfdIterationGlobal(it, next.get_pointer(),
-                                       prev.get_pointer(), vel.get_pointer(),
-                                       coeff.get_pointer(), nx, nxy, bx, by,
+                auto next_ptr = next.template get_multi_ptr<sycl::access::decorated::no>();
+                auto prev_ptr = prev.template get_multi_ptr<sycl::access::decorated::no>();
+                auto vel_ptr = vel.template get_multi_ptr<sycl::access::decorated::no>();
+                auto coeff_ptr = coeff.template get_multi_ptr<sycl::access::decorated::no>();
+                Iso3dfdIterationGlobal(it, next_ptr.get(),
+                                       prev_ptr.get(), vel_ptr.get(),
+                                       coeff_ptr.get(), nx, nxy, bx, by,
                                        n3_block, end_z);
               });
         else
           h.parallel_for(
               nd_range(global_nd_range, local_nd_range), [=](auto it) {
-                Iso3dfdIterationGlobal(it, prev.get_pointer(),
-                                       next.get_pointer(), vel.get_pointer(),
-                                       coeff.get_pointer(), nx, nxy, bx, by,
+                auto next_ptr = next.template get_multi_ptr<sycl::access::decorated::no>();
+                auto prev_ptr = prev.template get_multi_ptr<sycl::access::decorated::no>();
+                auto vel_ptr = vel.template get_multi_ptr<sycl::access::decorated::no>();
+                auto coeff_ptr = coeff.template get_multi_ptr<sycl::access::decorated::no>();
+                Iso3dfdIterationGlobal(it, prev_ptr.get(),
+                                       next_ptr.get(), vel_ptr.get(),
+                                       coeff_ptr.get(), nx, nxy, bx, by,
                                        n3_block, end_z);
               });
 #endif

diff --git a/sdk/samples/vector_sq_add/vector_sq_add.cpp b/sdk/samples/vector_sq_add/vector_sq_add.cpp
@@ -245,7 +245,7 @@ int main(int argc, char *argv[]) {
     auto print_queue_info = [](const sycl::queue &sycl_queue) {
       auto queue_type =
           get_native<sycl::backend::ext_oneapi_level_zero>(sycl_queue);
-#if __LIBSYCL_MAJOR_VERSION >= 6 && __LIBSYCL_MINOR_VERSION >= 2
+#if __LIBSYCL_MAJOR_VERSION > 6 || (__LIBSYCL_MAJOR_VERSION == 6 && __LIBSYCL_MINOR_VERSION >= 2)
       // 1 (default)
       if (auto *ptr_queue_handle =
               std::get_if<ze_command_list_handle_t>(&queue_type)) {

diff --git a/sdk/test/main_dpcgemm_fixture.cc b/sdk/test/main_dpcgemm_fixture.cc
@@ -101,7 +101,10 @@ float RunAndCheck(sycl::queue queue, const std::vector<float>& a, const std::vec
       auto c_acc = c_buf.get_access<sycl::access::mode::write>(cgh);
 
       cgh.parallel_for<class __GEMM>(sycl::range<2>(size, size), [=](sycl::id<2> id) {
-        GEMM(a_acc.get_pointer(), b_acc.get_pointer(), c_acc.get_pointer(), size, id);
+        auto a_acc_ptr = a_acc.get_multi_ptr<sycl::access::decorated::no>();
+        auto b_acc_ptr = b_acc.get_multi_ptr<sycl::access::decorated::no>();
+        auto c_acc_ptr = c_acc.get_multi_ptr<sycl::access::decorated::no>();
+        GEMM(a_acc_ptr.get(), b_acc_ptr.get(), c_acc_ptr.get(), size, id);
       });
     });
     queue.wait_and_throw();

diff --git a/sdk/test/main_vecsqadd_fixture.cc b/sdk/test/main_vecsqadd_fixture.cc
@@ -214,8 +214,6 @@ void RunExternalCorrIdTest(queue &q, const DoubleVector &a, const DoubleVector &
   vecAdd(q, a, b, sq_add);
   print_results(sq_add, vector_size);
 
-  // print_queue_info(q);
-
   StartTracing();
   vecAdd(q, c, d, sq_add2);
   StopTracing();
@@ -259,34 +257,6 @@ void RunVecsqadd(TestType a_test_type) {
   auto d_selector{gpu_selector_v};
   queue q(d_selector, NULL);
 
-  // Underlying queue handle object changes based on value of
-  // SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=?
-  auto print_queue_info = [](const sycl::queue &sycl_queue) {
-    auto queue_type = get_native<sycl::backend::ext_oneapi_level_zero>(sycl_queue);
-#if __LIBSYCL_MAJOR_VERSION >= 6 && __LIBSYCL_MINOR_VERSION >= 2
-    // 1 (default)
-    if (auto *ptr_queue_handle = std::get_if<ze_command_list_handle_t>(&queue_type)) {
-      printf("Queue  ptr: 0x%p, native queue: 0x%p, native device: 0x%p \n", &sycl_queue,
-             ptr_queue_handle,
-             get_native<sycl::backend::ext_oneapi_level_zero>(sycl_queue.get_device()));
-
-      // 0
-    } else if (auto *ptr_queue_handle = std::get_if<ze_command_queue_handle_t>(&queue_type)) {
-      printf("Queue  ptr: 0x%p, native queue: 0x%p, native device: 0x%p \n", &sycl_queue,
-             ptr_queue_handle,
-             get_native<sycl::backend::ext_oneapi_level_zero>(sycl_queue.get_device()));
-    } else {
-      std::cerr << "Underlying level zero queue handle could not be obtained." << '\n';
-    }
-#else
-    printf("Queue  ptr: 0x%p, native queue: 0x%p, native device: 0x%p \n", &sycl_queue,
-           get_native<sycl::backend::ext_oneapi_level_zero>(sycl_queue),
-           get_native<sycl::backend::ext_oneapi_level_zero>(sycl_queue.get_device()));
-#endif
-  };
-
-  print_queue_info(q);
-
   // Start Tests by Type
 
   if (a_test_type == TestType::RUN_ALL) {