From 3eb1ff7047d8263e55c141922f81962df920f274 Mon Sep 17 00:00:00 2001 From: tbbdev Date: Fri, 10 Mar 2023 08:33:18 -0600 Subject: [PATCH] Commit oneTBB source code 3b16143 --- .bazelversion | 2 +- BUILD.bazel | 8 +- Bazel.md | 2 +- CMakeLists.txt | 47 ++- INSTALL.md | 13 + README.md | 2 + RELEASE_NOTES.md | 23 +- SYSTEM_REQUIREMENTS.md | 7 + cmake/compilers/GNU.cmake | 5 +- cmake/sanitize.cmake | 3 +- doc/main/reference/reference.rst | 2 - .../Flow-Graph-exception-tips.rst | 17 + .../tbb_userguide/Flow-Graph-waiting-tips.rst | 11 + .../Guiding_Task_Scheduler_Execution.rst | 5 +- doc/main/tbb_userguide/Memory_Allocation.rst | 2 +- .../Migration_Guide/Task_API.rst | 2 +- .../Working_on_the_Assembly_Line_pipeline.rst | 14 +- .../automatically-replacing-malloc.rst | 22 + .../snippets/flow_graph_examples.cpp | 4 - examples/README.md | 21 +- include/oneapi/tbb/concurrent_hash_map.h | 14 +- include/oneapi/tbb/concurrent_lru_cache.h | 14 +- include/oneapi/tbb/concurrent_queue.h | 54 ++- include/oneapi/tbb/concurrent_vector.h | 2 +- include/oneapi/tbb/detail/_aggregator.h | 7 +- .../tbb/detail/_concurrent_queue_base.h | 16 +- .../oneapi/tbb/detail/_concurrent_skip_list.h | 1 + .../tbb/detail/_concurrent_unordered_base.h | 2 +- include/oneapi/tbb/detail/_config.h | 8 +- .../oneapi/tbb/detail/_flow_graph_join_impl.h | 20 +- .../oneapi/tbb/detail/_flow_graph_node_impl.h | 4 +- include/oneapi/tbb/detail/_segment_table.h | 15 +- include/oneapi/tbb/flow_graph.h | 8 +- include/oneapi/tbb/info.h | 12 +- include/oneapi/tbb/parallel_for.h | 5 +- include/oneapi/tbb/parallel_for_each.h | 10 +- include/oneapi/tbb/parallel_reduce.h | 6 + include/oneapi/tbb/partitioner.h | 1 + include/oneapi/tbb/task_arena.h | 9 - include/oneapi/tbb/version.h | 4 +- python/rml/ipc_server.cpp | 4 +- src/tbb/CMakeLists.txt | 4 +- src/tbb/arena.cpp | 10 - src/tbb/arena_slot.h | 2 +- src/tbb/itt_notify.h | 5 +- src/tbb/main.cpp | 1 + src/tbb/rml_thread_monitor.h | 20 +- src/tbb/semaphore.h | 53 +-- src/tbb/task_stream.h | 9 +- src/tbb/tbb.rc | 41 +- src/tbb/tools_api/disable_warnings.h | 8 +- src/tbb/tools_api/ittnotify.h | 168 +++++++- src/tbb/tools_api/ittnotify_config.h | 128 +++++- src/tbb/tools_api/ittnotify_static.c | 384 ++++++++++++++---- src/tbb/tools_api/ittnotify_static.h | 11 +- src/tbb/tools_api/ittnotify_types.h | 2 +- src/tbb/tools_api/legacy/ittnotify.h | 35 +- src/tbbbind/CMakeLists.txt | 4 +- src/tbbbind/tbb_bind.rc | 41 +- src/tbbmalloc/CMakeLists.txt | 4 +- src/tbbmalloc/Customize.h | 4 +- src/tbbmalloc/frontend.cpp | 4 +- src/tbbmalloc/tbbmalloc.rc | 41 +- src/tbbmalloc_proxy/tbbmalloc_proxy.rc | 41 +- test/CMakeLists.txt | 18 +- test/common/common_arena_constraints.h | 4 +- test/common/concurrent_lru_cache_common.h | 14 +- test/common/doctest.h | 4 + test/common/parallel_for_each_common.h | 6 +- test/common/utils.h | 40 +- .../conformance_arena_constraints.cpp | 17 +- test/conformance/conformance_flowgraph.h | 12 +- test/conformance/conformance_join_node.cpp | 6 +- test/tbb/test_arena_constraints.cpp | 17 +- test/tbb/test_collaborative_call_once.cpp | 3 + test/tbb/test_concurrent_lru_cache.cpp | 28 +- test/tbb/test_concurrent_queue_whitebox.cpp | 25 +- test/tbb/test_eh_flow_graph.cpp | 4 +- test/tbb/test_eh_thread.cpp | 49 +-- test/tbb/test_indexer_node.cpp | 2 +- test/tbb/test_join_node.h | 2 +- test/tbb/test_limiter_node.cpp | 4 +- test/tbb/test_numa_dist.cpp | 4 + test/tbb/test_parallel_sort.cpp | 11 +- test/tbb/test_partitioner.cpp | 5 + test/tbbmalloc/test_malloc_overload.cpp | 17 +- third-party-programs.txt | 4 +- 87 files changed, 1121 insertions(+), 623 deletions(-) create mode 100644 doc/main/tbb_userguide/Flow-Graph-exception-tips.rst create mode 100644 doc/main/tbb_userguide/Flow-Graph-waiting-tips.rst create mode 100644 doc/main/tbb_userguide/automatically-replacing-malloc.rst diff --git a/.bazelversion b/.bazelversion index 0062ac9718..09b254e90c 100644 --- a/.bazelversion +++ b/.bazelversion @@ -1 +1 @@ -5.0.0 +6.0.0 diff --git a/BUILD.bazel b/BUILD.bazel index 7aecb3e067..3881d6843d 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -36,7 +36,7 @@ cc_library( "include/oneapi/tbb/detail/*.h", ]), copts = ["-w"] + select({ - "@bazel_tools//platforms:windows": [""], + "@platforms//os:windows": [""], "//conditions:default": ["-mwaitpkg"], }), defines = @@ -47,7 +47,7 @@ cc_library( ], }) + select({ - "@bazel_tools//platforms:osx": ["_XOPEN_SOURCE"], + "@platforms//os:osx": ["_XOPEN_SOURCE"], "//conditions:default": [], }), includes = [ @@ -55,8 +55,8 @@ cc_library( ], linkopts = select({ - "@bazel_tools//platforms:windows": [], - "@bazel_tools//platforms:linux": [ + "@platforms//os:windows": [], + "@platforms//os:linux": [ "-ldl", "-pthread", "-lrt", diff --git a/Bazel.md b/Bazel.md index 702e05491c..996a3b2eb5 100644 --- a/Bazel.md +++ b/Bazel.md @@ -33,7 +33,7 @@ example _WORKSPACE.bazel_: ```python -load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") +load("@platforms//tools/build_defs/repo:git.bzl", "git_repository") git_repository( name = "oneTBB", diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b8bb7c8be..a24287b1db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -216,19 +216,17 @@ else() if (TBB_BUILD) add_subdirectory(src/tbb) endif() - if (NOT "${CMAKE_SYSTEM_PROCESSOR}" MATCHES "mips") - if (TBBMALLOC_BUILD) - add_subdirectory(src/tbbmalloc) - if(TBBMALLOC_PROXY_BUILD AND NOT "${MSVC_CXX_ARCHITECTURE_ID}" MATCHES "ARM64") - add_subdirectory(src/tbbmalloc_proxy) - endif() - endif() - if (APPLE OR NOT BUILD_SHARED_LIBS) - message(STATUS "TBBBind build targets are disabled due to unsupported environment") - else() - add_subdirectory(src/tbbbind) + if (TBBMALLOC_BUILD) + add_subdirectory(src/tbbmalloc) + if(TBBMALLOC_PROXY_BUILD AND NOT "${MSVC_CXX_ARCHITECTURE_ID}" MATCHES "ARM64") + add_subdirectory(src/tbbmalloc_proxy) endif() endif() + if (APPLE OR NOT BUILD_SHARED_LIBS) + message(STATUS "TBBBind build targets are disabled due to unsupported environment") + else() + add_subdirectory(src/tbbbind) + endif() # ------------------------------------------------------------------- # Installation instructions @@ -279,10 +277,27 @@ endif() if (ANDROID_PLATFORM) if ("${ANDROID_STL}" STREQUAL "c++_shared") - configure_file( - "${ANDROID_NDK}/sources/cxx-stl/llvm-libc++/libs/${ANDROID_ABI}/libc++_shared.so" - "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libc++_shared.so" - COPYONLY) + if (${ANDROID_NDK_MAJOR} GREATER_EQUAL "25") + if(ANDROID_ABI STREQUAL "arm64-v8a") + set(ANDROID_TOOLCHAIN_NAME "aarch64-linux-android") + elseif(ANDROID_ABI STREQUAL "x86_64") + set(ANDROID_TOOLCHAIN_NAME "x86_64-linux-android") + elseif(ANDROID_ABI STREQUAL "armeabi-v7a") + set(ANDROID_TOOLCHAIN_NAME "arm-linux-androideabi") + elseif(ANDROID_ABI STREQUAL "x86") + set(ANDROID_TOOLCHAIN_NAME "i686-linux-android") + endif() + + configure_file( + "${ANDROID_TOOLCHAIN_ROOT}/sysroot/usr/lib/${ANDROID_TOOLCHAIN_NAME}/libc++_shared.so" + "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libc++_shared.so" + COPYONLY) + else() + configure_file( + "${ANDROID_NDK}/sources/cxx-stl/llvm-libc++/libs/${ANDROID_ABI}/libc++_shared.so" + "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libc++_shared.so" + COPYONLY) + endif() endif() # This custom target may be implemented without separate CMake script, but it requires # ADB(Android Debug Bridge) executable file availability, so to incapsulate this requirement diff --git a/INSTALL.md b/INSTALL.md index 6fff47a165..3c63c9fd84 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -77,6 +77,19 @@ cmake .. cpack ``` +## Installation from vcpkg + +You can download and install oneTBB using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager: +```sh + git clone https://github.com/Microsoft/vcpkg.git + cd vcpkg + ./bootstrap-vcpkg.sh #.\bootstrap-vcpkg.bat(for Windows) + ./vcpkg integrate install + ./vcpkg install tbb +``` + +The oneTBB port in vcpkg is kept up to date by Microsoft* team members and community contributors. If the version is out of date, create an issue or pull request on the [vcpkg repository](https://github.com/Microsoft/vcpkg). + ## Example of Installation ### Single-configuration generators diff --git a/README.md b/README.md index e51f1c4585..aec8d2aae7 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,8 @@ Refer to oneTBB [examples](examples) and [samples](https://github.com/oneapi-src oneTBB is a part of [oneAPI](https://oneapi.io). The current branch implements version 1.1 of oneAPI Specification. +> **_NOTE:_** Threading Building Blocks (TBB) is now called oneAPI Threading Building Blocks (oneTBB) to highlight that the tool is a part of the oneAPI ecosystem. + ## Release Information Here are [Release Notes](RELEASE_NOTES.md) and [System Requirements](SYSTEM_REQUIREMENTS.md). diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 6e7093059a..dcd9f732f4 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -18,36 +18,25 @@ This document contains changes of oneTBB compared to the last release. ## Table of Contents -- [New Features](#new_features) - [Known Limitations](#known-limitations) - [Fixed Issues](#fixed-issues) - [Open-source Contributions Integrated](#open-source-contributions-integrated) -## :white_check_mark: New Features -- Improved support and use of the latest C++ standards for parallel_sort that allows using this algorithm with user-defined and standard library-defined objects with modern semantics. -- The following features are now fully functional: task_arena extensions, collaborative_call_once, adaptive mutexes, heterogeneous overloads for concurrent_hash_map, and task_scheduler_handle. -- Added support for Windows* Server 2022 and Python 3.10. - ## :rotating_light: Known Limitations +- A static assert causes compilation failures in oneTBB headers when compiling with Clang* 12.0.0 or newer if using the LLVM* standard library with -ffreestanding and C++11/14 compiler options. - An application using Parallel STL algorithms in libstdc++ versions 9 and 10 may fail to compile due to incompatible interface changes between earlier versions of Threading Building Blocks (TBB) and oneAPI Threading Building Blocks (oneTBB). Disable support for Parallel STL algorithms by defining PSTL_USE_PARALLEL_POLICIES (in libstdc++ 9) or _GLIBCXX_USE_TBB_PAR_BACKEND (in libstdc++ 10) macro to zero before inclusion of the first standard header file in each translation unit. - On Linux* OS, if oneAPI Threading Building Blocks (oneTBB) or Threading Building Blocks (TBB) are installed in a system folder like /usr/lib64, the application may fail to link due to the order in which the linker searches for libraries. Use the -L linker option to specify the correct location of oneTBB library. This issue does not affect the program execution. - The oneapi::tbb::info namespace interfaces might unexpectedly change the process affinity mask on Windows* OS systems (see https://github.com/open-mpi/hwloc/issues/366 for details) when using hwloc version lower than 2.5. -- Using a hwloc version other than 1.11, 2.0, or 2.5 may cause an undefined behavior on Windows OS. See https://github.com/open-mpi/hwloc/issues/477 for details. +- Using a hwloc version other than 1.11, 2.0, or 2.5 may cause an undefined behavior on Windows* OS. See https://github.com/open-mpi/hwloc/issues/477 for details. - The NUMA topology may be detected incorrectly on Windows OS machines where the number of NUMA node threads exceeds the size of 1 processor group. - On Windows OS on ARM64*, when compiling an application using oneTBB with the Microsoft* Compiler, the compiler issues a warning C4324 that a structure was padded due to the alignment specifier. Consider suppressing the warning by specifying /wd4324 to the compiler command line. - oneTBB does not support fork(), to work-around the issue, consider using task_scheduler_handle to join oneTBB worker threads before using fork(). - C++ exception handling mechanism on Windows* OS on ARM64* might corrupt memory if an exception is thrown from any oneTBB parallel algorithm (see Windows* OS on ARM64* compiler issue: https://developercommunity.visualstudio.com/t/ARM64-incorrect-stack-unwinding-for-alig/1544293). ## :hammer: Fixed Issues -- Memory allocator crash on a system with an incomplete /proc/meminfo (GitHub* [#584](https://github.com/oneapi-src/oneTBB/issues/584)). -- Incorrect blocking of task stealing (GitHub* #[478](https://github.com/oneapi-src/oneTBB/issues/478)). -- Hang due to incorrect decrement of a limiter_node (GitHub* [#634](https://github.com/oneapi-src/oneTBB/issues/634)). -- Memory corruption in some rare cases when passing big messages in a flow graph (GitHub* [#639](https://github.com/oneapi-src/oneTBB/issues/639)). -- Possible deadlock in a throwable flow graph node with a lightweight policy. The lightweight policy is now ignored for functors that can throw exceptions (GitHub* [#420](https://github.com/oneapi-src/oneTBB/issues/420)). -- Crash when obtaining a range from empty ordered and unordered containers (GitHub* [#641](https://github.com/oneapi-src/oneTBB/issues/641)). -- Deadlock in a concurrent_vector resize() that could happen when the new size is less than the previous size (GitHub* [#733](https://github.com/oneapi-src/oneTBB/issues/733)). +- Memory allocator crash when allocating ~1TB on 64-bit systems (GitHub* [#838](https://github.com/oneapi-src/oneTBB/issues/838)). +- Fixed thread distribution over NUMA nodes on Windows* OS systems. +- For oneapi::tbb::suspend, it is guaranteed that the user-specified callable object is executed by the calling thread. ## :octocat: Open-source Contributions Integrated -- Improved aligned memory allocation. Contributed by Andrey Semashev (https://github.com/oneapi-src/oneTBB/pull/671). -- Optimized usage of atomic_fence on IA-32 and Intel(R) 64 architectures. Contributed by Andrey Semashev (https://github.com/oneapi-src/oneTBB/pull/328). -- Fixed incorrect definition of the assignment operator in containers. Contributed by Andrey Semashev (https://github.com/oneapi-src/oneTBB/issues/372). +- Fix for full LTO* build, library and tests, on UNIX* OS systems. Contributed by Vladislav Shchapov (https://github.com/oneapi-src/oneTBB/pull/798). diff --git a/SYSTEM_REQUIREMENTS.md b/SYSTEM_REQUIREMENTS.md index c6a6c0bea5..4e4e8e80c9 100644 --- a/SYSTEM_REQUIREMENTS.md +++ b/SYSTEM_REQUIREMENTS.md @@ -21,6 +21,7 @@ This document provides details about hardware, operating system, and software pr - [Supported Hardware](#supported-hardware) - [Software](#software) - [Supported Operating Systems](#supported-operating-systems) + - [Community-Supported Platforms](#community-supported-platforms) - [Supported Compilers](#supported-compilers) @@ -54,6 +55,12 @@ This document provides details about hardware, operating system, and software pr - Systems with Android* operating systems - Android* 9 +### Community-Supported Platforms +- MinGW* +- FreeBSD* +- Microsoft* Windows* on ARM*/ARM64* +- macOS* on ARM64* + ### Supported Compilers - Intel* oneAPI DPC++/C++ Compiler - Intel* C++ Compiler 19.0 and 19.1 version diff --git a/cmake/compilers/GNU.cmake b/cmake/compilers/GNU.cmake index cd76acfe1e..34c10db076 100644 --- a/cmake/compilers/GNU.cmake +++ b/cmake/compilers/GNU.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -68,6 +68,9 @@ if (MINGW AND CMAKE_SYSTEM_PROCESSOR MATCHES "i.86") list (APPEND TBB_COMMON_COMPILE_FLAGS -msse2) endif () +# Gnu flags to prevent compiler from optimizing out security checks +set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} -fno-strict-overflow -fno-delete-null-pointer-checks -fwrapv) + # TBB malloc settings set(TBBMALLOC_LIB_COMPILE_FLAGS -fno-rtti -fno-exceptions) set(TBB_OPENMP_FLAG -fopenmp) diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index 10e3873dfb..d07b32986a 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2022 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -39,4 +39,5 @@ set(TBB_TESTS_ENVIRONMENT ${TBB_TESTS_ENVIRONMENT} "LSAN_OPTIONS=suppressions=${CMAKE_CURRENT_SOURCE_DIR}/cmake/suppressions/lsan.suppressions") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TBB_SANITIZE_OPTION}") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${TBB_SANITIZE_OPTION}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${TBB_SANITIZE_OPTION}") diff --git a/doc/main/reference/reference.rst b/doc/main/reference/reference.rst index 8990508206..9c8bca526a 100644 --- a/doc/main/reference/reference.rst +++ b/doc/main/reference/reference.rst @@ -47,7 +47,5 @@ The key properties of a preview feature are: scalable_memory_pools helpers_for_expressing_graphs concurrent_lru_cache_cls - constraints_extensions - info_namespace_extensions task_group_extensions custom_mutex_chmap diff --git a/doc/main/tbb_userguide/Flow-Graph-exception-tips.rst b/doc/main/tbb_userguide/Flow-Graph-exception-tips.rst new file mode 100644 index 0000000000..88b0aea35d --- /dev/null +++ b/doc/main/tbb_userguide/Flow-Graph-exception-tips.rst @@ -0,0 +1,17 @@ +.. _Flow_Graph_exception_tips: + +Flow Graph Tips for Exception Handling and Cancellation +======================================================= + + +The execution of a flow graph can be canceled directly or as a result of +an exception that propagates beyond a node's body. You can then +optionally reset the graph so that it can be re-executed. + +.. toctree:: + :maxdepth: 4 + + ../tbb_userguide/catching_exceptions + ../tbb_userguide/cancel_a_graph + ../tbb_userguide/use_graph_reset + ../tbb_userguide/cancelling_nested_parallelism diff --git a/doc/main/tbb_userguide/Flow-Graph-waiting-tips.rst b/doc/main/tbb_userguide/Flow-Graph-waiting-tips.rst new file mode 100644 index 0000000000..176fe7bb89 --- /dev/null +++ b/doc/main/tbb_userguide/Flow-Graph-waiting-tips.rst @@ -0,0 +1,11 @@ +.. _Flow_Graph_waiting_tips: + +Flow Graph Tips for Waiting for and Destroying a Flow Graph +=========================================================== + +.. toctree:: + :maxdepth: 4 + + ../tbb_userguide/always_use_wait_for_all + ../tbb_userguide/avoid_dynamic_node_removal + ../tbb_userguide/destroy_graphs_outside_main_thread diff --git a/doc/main/tbb_userguide/Guiding_Task_Scheduler_Execution.rst b/doc/main/tbb_userguide/Guiding_Task_Scheduler_Execution.rst index 3d770c794b..b0b33fe498 100644 --- a/doc/main/tbb_userguide/Guiding_Task_Scheduler_Execution.rst +++ b/doc/main/tbb_userguide/Guiding_Task_Scheduler_Execution.rst @@ -61,8 +61,9 @@ assign a NUMA node identifier to the ``task_arena::constraints::numa_id`` field. The processors with `Intel® Hybrid Technology `_ contain several core types, each is suited for different purposes. -For example, some applications may improve their performance by preferring execution on the most performant cores. -To set execution preference, assign specific core type identifier to the ``task_arena::constraints::core_type`` field. +In most cases, systems with hybrid CPU architecture show reasonable performance without involving additional API calls. +However, in some exceptional scenarios, performance may be tuned by setting the preferred core type. +To set the preferred core type for the execution, assign a specific core type identifier to the ``task_arena::constraints::core_type`` field. The example shows how to set the most performant core type as preferable for work execution: diff --git a/doc/main/tbb_userguide/Memory_Allocation.rst b/doc/main/tbb_userguide/Memory_Allocation.rst index 2ea4f7404d..745f0258db 100644 --- a/doc/main/tbb_userguide/Memory_Allocation.rst +++ b/doc/main/tbb_userguide/Memory_Allocation.rst @@ -68,4 +68,4 @@ redirect the standard routines to these functions. ../tbb_userguide/Which_Dynamic_Libraries_to_Use ../tbb_userguide/Allocator_Configuration - ../tbb_userguide/Automically_Replacing_malloc + ../tbb_userguide/automatically-replacing-malloc diff --git a/doc/main/tbb_userguide/Migration_Guide/Task_API.rst b/doc/main/tbb_userguide/Migration_Guide/Task_API.rst index cdf922a316..e96e4324b3 100644 --- a/doc/main/tbb_userguide/Migration_Guide/Task_API.rst +++ b/doc/main/tbb_userguide/Migration_Guide/Task_API.rst @@ -361,7 +361,7 @@ is not guaranteed to be executed next by the current thread. tbb::task_spawn(child); - root.wait_for_all();; + root.wait_for_all(); } In oneTBB, this can be done using ``oneapi::tbb::task_group``. diff --git a/doc/main/tbb_userguide/Working_on_the_Assembly_Line_pipeline.rst b/doc/main/tbb_userguide/Working_on_the_Assembly_Line_pipeline.rst index 75e3d6be1e..939f713cd3 100644 --- a/doc/main/tbb_userguide/Working_on_the_Assembly_Line_pipeline.rst +++ b/doc/main/tbb_userguide/Working_on_the_Assembly_Line_pipeline.rst @@ -115,13 +115,13 @@ the overhead of copying a ``TextSlice``. oneapi::tbb::parallel_pipeline( ntoken, oneapi::tbb::make_filter( - oneapi::tbb::filter::serial_in_order, MyInputFunc(input_file) ) + oneapi::tbb::filter_mode::serial_in_order, MyInputFunc(input_file) ) & oneapi::tbb::make_filter( - oneapi::tbb::filter::parallel, MyTransformFunc() ) + oneapi::tbb::filter_mode::parallel, MyTransformFunc() ) & oneapi::tbb::make_filter( - oneapi::tbb::filter::serial_in_order, MyOutputFunc(output_file) ) ); + oneapi::tbb::filter_mode::serial_in_order, MyOutputFunc(output_file) ) ); } @@ -172,13 +172,13 @@ equivalent version of the previous example that does this follows: void RunPipeline( int ntoken, FILE* input_file, FILE* output_file ) { - oneapi::tbb::filter f1( oneapi::tbb::filter::serial_in_order, + oneapi::tbb::filter_mode f1( oneapi::tbb::filter_mode::serial_in_order, MyInputFunc(input_file) ); - oneapi::tbb::filter f2(oneapi::tbb::filter::parallel, + oneapi::tbb::filter_mode f2(oneapi::tbb::filter_mode::parallel, MyTransformFunc() ); - oneapi::tbb::filter f3(oneapi::tbb::filter::serial_in_order, + oneapi::tbb::filter_mode f3(oneapi::tbb::filter_mode::serial_in_order, MyOutputFunc(output_file) ); - oneapi::tbb::filter f = f1 & f2 & f3; + oneapi::tbb::filter_mode f = f1 & f2 & f3; oneapi::tbb::parallel_pipeline(ntoken,f); } diff --git a/doc/main/tbb_userguide/automatically-replacing-malloc.rst b/doc/main/tbb_userguide/automatically-replacing-malloc.rst new file mode 100644 index 0000000000..307884221d --- /dev/null +++ b/doc/main/tbb_userguide/automatically-replacing-malloc.rst @@ -0,0 +1,22 @@ +.. _automatically-replacing-malloc: + +Automatically Replacing ``malloc`` and Other C/C++ Functions for Dynamic Memory Allocation +========================================================================================== + + +On Windows*, Linux\* operating systems, it is possible to automatically +replace all calls to standard functions for dynamic memory allocation +(such as ``malloc``) with the |full_name| scalable equivalents. +Doing so can sometimes improve application performance. + + +Replacements are provided by the proxy library (the library names can be +found in platform-specific sections below). A proxy library and a +scalable memory allocator library should be taken from the same release +of oneTBB, otherwise the libraries may be mutually incompatible. + +.. toctree:: + :maxdepth: 4 + + ../tbb_userguide/Windows_C_Dynamic_Memory_Interface_Replacement + ../tbb_userguide/Linux_C_Dynamic_Memory_Interface_Replacement diff --git a/doc/main/tbb_userguide/snippets/flow_graph_examples.cpp b/doc/main/tbb_userguide/snippets/flow_graph_examples.cpp index c1b97975c3..72c594c5c7 100644 --- a/doc/main/tbb_userguide/snippets/flow_graph_examples.cpp +++ b/doc/main/tbb_userguide/snippets/flow_graph_examples.cpp @@ -17,10 +17,6 @@ /* Flow Graph Code Example for the Userguide. */ -//! Enable extended task_arena constraints feature for supporting Intel Hybrid Technology -//! and Intel Hyper-Threading Technology. -#define TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION 1 - #include #include diff --git a/examples/README.md b/examples/README.md index 49b2a7cf5c..318d2d93da 100644 --- a/examples/README.md +++ b/examples/README.md @@ -3,17 +3,17 @@ This directory contains example usages of oneAPI Threading Building Blocks. | Code sample name | Description |:--- |:--- -| getting_started/sub_string_finder | Example referenced by the [oneAPI Threading Building Blocks Get Started Guide](https://software.intel.com/content/www/us/en/develop/documentation/get-started-with-onetbb/top.html). Finds largest matching substrings. +| getting_started/sub_string_finder | Example referenced by the [oneAPI Threading Building Blocks Get Started Guide](https://oneapi-src.github.io/oneTBB/GSG/get_started.html#get-started-guide). Finds the largest matching substrings. | concurrent_hash_map/count_strings | Concurrently inserts strings into a `concurrent_hash_map` container. | concurrent_priority_queue/shortpath | Solves the single source shortest path problem using a `concurrent_priority_queue` container. -| graph/binpack | A solution to the binpacking problem using a `queue_node`, a `buffer_node` and `function_node`s. +| graph/binpack | A solution to the binpacking problem using a `queue_node`, a `buffer_node`, and `function_node`s. | graph/cholesky | Several versions of Cholesky Factorization algorithm implementation. -| graph/dining_philosophers | An implementation of dining philosophers in graph using the reserving `join_node`. +| graph/dining_philosophers | An implementation of dining philosophers in a graph using the reserving `join_node`. | graph/fgbzip2 | A parallel implementation of bzip2 block-sorting file compressor. | graph/logic_sim | An example of a collection of digital logic gates that can be easily composed into larger circuits. | graph/som | An example of a Kohonen Self-Organizing Map using cancellation. | parallel_for/game_of_life | Game of life overlay. -| parallel_for/polygon_overlay | polygon overlay. +| parallel_for/polygon_overlay | Polygon overlay. | parallel_for/seismic | Parallel seismic wave simulation. | parallel_for/tachyon | Parallel 2-D raytracer/renderer. | parallel_for_each/parallel_preorder | Parallel preorder traversal of a graph. @@ -25,16 +25,17 @@ This directory contains example usages of oneAPI Threading Building Blocks. | test_all/fibonacci | Compute Fibonacci numbers in different ways. ## System Requirements -"Refer to the [System Requirements](https://software.intel.com/content/www/us/en/develop/articles/intel-oneapi-threading-building-blocks-system-requirements.html) for the list of supported hardware and software". +Refer to the [System Requirements](https://github.com/oneapi-src/oneTBB/blob/master/SYSTEM_REQUIREMENTS.md) for the list of supported hardware and software. ### Graphical User Interface (GUI) -Some examples (fractal, seismic, tachyon, polygon_overlay) supports different GUI modes, which may be defined via the `EXAMPLES_UI_MODE` CMake variable. Supported values are: +Some examples (e.g., fractal, seismic, tachyon, polygon_overlay) support different GUI modes, which may be defined via the `EXAMPLES_UI_MODE` CMake variable. +Supported values are: - Cross-platform: - `con` - Console mode (Default). -- Windows: +- Windows* OS: - `gdi` - `GDI+` based implementation. - - `d2d` - `Direct 2D` based implementation. May offer superior performance, but can only be used if the Microsoft* DirectX* SDK is installed on your system(`DXSDK_DIR` should be defined). -- Linux: + - `d2d` - `Direct 2D` based implementation. May offer superior performance but can only be used if the Microsoft* DirectX* SDK is installed on your system(`DXSDK_DIR` should be defined). +- Linux* OS: - `x` - `X11` based implementation. Also `libXext` may be required to display the output correctly. -- MacOS: +- macOS*: - `mac` - `OpenGL` based implementation. Also requires the `Foundation` and `Cocoa` libraries availability. diff --git a/include/oneapi/tbb/concurrent_hash_map.h b/include/oneapi/tbb/concurrent_hash_map.h index 87881f5310..b30033742c 100644 --- a/include/oneapi/tbb/concurrent_hash_map.h +++ b/include/oneapi/tbb/concurrent_hash_map.h @@ -147,9 +147,9 @@ class hash_map_base { } template - void init_buckets_impl( segment_ptr_type ptr, size_type sz, Args&&... args ) { + void init_buckets_impl( segment_ptr_type ptr, size_type sz, const Args&... args ) { for (size_type i = 0; i < sz; ++i) { - bucket_allocator_traits::construct(my_allocator, ptr + i, std::forward(args)...); + bucket_allocator_traits::construct(my_allocator, ptr + i, args...); } } @@ -292,7 +292,7 @@ class hash_map_base { if( sz >= mask ) { // TODO: add custom load_factor segment_index_type new_seg = tbb::detail::log2( mask+1 ); //optimized segment_index_of __TBB_ASSERT( is_valid(my_table[new_seg-1].load(std::memory_order_relaxed)), "new allocations must not publish new mask until segment has allocated"); - static const segment_ptr_type is_allocating = segment_ptr_type(2);; + static const segment_ptr_type is_allocating = segment_ptr_type(2); segment_ptr_type disabled = nullptr; if (!(my_table[new_seg].load(std::memory_order_acquire)) && my_table[new_seg].compare_exchange_strong(disabled, is_allocating)) @@ -443,9 +443,11 @@ class hash_map_iterator { if( k&(k-2) ) // not the beginning of a segment ++my_bucket; else my_bucket = my_map->get_bucket( k ); - my_node = static_cast( my_bucket->node_list.load(std::memory_order_relaxed) ); - if( map_base::is_valid(my_node) ) { - my_index = k; return; + node_base *n = my_bucket->node_list.load(std::memory_order_relaxed); + if( map_base::is_valid(n) ) { + my_node = static_cast(n); + my_index = k; + return; } ++k; } diff --git a/include/oneapi/tbb/concurrent_lru_cache.h b/include/oneapi/tbb/concurrent_lru_cache.h index 29ee3bd9d2..83d0576eae 100644 --- a/include/oneapi/tbb/concurrent_lru_cache.h +++ b/include/oneapi/tbb/concurrent_lru_cache.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -140,6 +140,11 @@ class concurrent_lru_cache : no_assign { if (! --(map_it->second.my_ref_counter)) { // if the LRU history is full, evict the oldest items to get space if (my_history_list.size() >= my_history_list_capacity) { + if (my_history_list_capacity == 0) { + // Since LRU history capacity is zero, there is no need to keep the element in history + my_storage_map.erase(map_it); + return; + } std::size_t number_of_elements_to_evict = 1 + my_history_list.size() - my_history_list_capacity; for (std::size_t i = 0; i < number_of_elements_to_evict; ++i) { @@ -320,13 +325,16 @@ struct concurrent_lru_cache::retrieve_aggregator_o public: retrieve_aggregator_operation(key_type key) : aggregator_operation(aggregator_operation::op_type::retrieve), - my_key(key), my_is_new_value_needed(false) {} + my_key(key), my_map_record_ptr(nullptr), my_is_new_value_needed(false) {} void handle(lru_cache_type& lru_cache_ref) { my_map_record_ptr = &lru_cache_ref.retrieve_serial(my_key, my_is_new_value_needed); } - storage_map_reference_type result() { return *my_map_record_ptr; } + storage_map_reference_type result() { + __TBB_ASSERT(my_map_record_ptr, "Attempt to call result() before calling handle()"); + return *my_map_record_ptr; + } bool is_new_value_needed() { return my_is_new_value_needed; } }; diff --git a/include/oneapi/tbb/concurrent_queue.h b/include/oneapi/tbb/concurrent_queue.h index 94bff472cf..24659715b1 100644 --- a/include/oneapi/tbb/concurrent_queue.h +++ b/include/oneapi/tbb/concurrent_queue.h @@ -28,6 +28,24 @@ namespace tbb { namespace detail { namespace d2 { +template +std::pair internal_try_pop_impl(void* dst, QueueRep& queue, Allocator& alloc ) { + ticket_type ticket{}; + do { + // Basically, we need to read `head_counter` before `tail_counter`. To achieve it we build happens-before on `head_counter` + ticket = queue.head_counter.load(std::memory_order_acquire); + do { + if (static_cast(queue.tail_counter.load(std::memory_order_relaxed) - ticket) <= 0) { // queue is empty + // Queue is empty + return { false, ticket }; + } + // Queue had item with ticket k when we looked. Attempt to get that item. + // Another thread snatched the item, retry. + } while (!queue.head_counter.compare_exchange_strong(ticket, ticket + 1)); + } while (!queue.choose(ticket).pop(dst, ticket, queue, alloc)); + return { true, ticket }; +} + // A high-performance thread-safe non-blocking concurrent queue. // Multiple threads may each push and pop concurrently. // Assignment construction is not allowed. @@ -178,20 +196,7 @@ class concurrent_queue { } bool internal_try_pop( void* dst ) { - ticket_type k; - do { - k = my_queue_representation->head_counter.load(std::memory_order_relaxed); - do { - if (static_cast(my_queue_representation->tail_counter.load(std::memory_order_relaxed) - k) <= 0) { - // Queue is empty - return false; - } - - // Queue had item with ticket k when we looked. Attempt to get that item. - // Another thread snatched the item, retry. - } while (!my_queue_representation->head_counter.compare_exchange_strong(k, k + 1)); - } while (!my_queue_representation->choose(k).pop(dst, k, *my_queue_representation, my_allocator)); - return true; + return internal_try_pop_impl(dst, *my_queue_representation, my_allocator).first; } template @@ -505,21 +510,14 @@ class concurrent_bounded_queue { } bool internal_pop_if_present( void* dst ) { - ticket_type ticket; - do { - ticket = my_queue_representation->head_counter.load(std::memory_order_relaxed); - do { - if (static_cast(my_queue_representation->tail_counter.load(std::memory_order_relaxed) - ticket) <= 0) { // queue is empty - // Queue is empty - return false; - } - // Queue had item with ticket k when we looked. Attempt to get that item. - // Another thread snatched the item, retry. - } while (!my_queue_representation->head_counter.compare_exchange_strong(ticket, ticket + 1)); - } while (!my_queue_representation->choose(ticket).pop(dst, ticket, *my_queue_representation, my_allocator)); + bool present{}; + ticket_type ticket{}; + std::tie(present, ticket) = internal_try_pop_impl(dst, *my_queue_representation, my_allocator); - r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, ticket); - return true; + if (present) { + r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, ticket); + } + return present; } void internal_abort() { diff --git a/include/oneapi/tbb/concurrent_vector.h b/include/oneapi/tbb/concurrent_vector.h index 51c187f8b8..2a2cb1e4bf 100644 --- a/include/oneapi/tbb/concurrent_vector.h +++ b/include/oneapi/tbb/concurrent_vector.h @@ -1031,7 +1031,7 @@ class concurrent_vector this->delete_segment(seg_idx - 1); } } - if (!k) this->my_first_block.store(0, std::memory_order_relaxed);; + if (!k) this->my_first_block.store(0, std::memory_order_relaxed); } } diff --git a/include/oneapi/tbb/detail/_aggregator.h b/include/oneapi/tbb/detail/_aggregator.h index a20b637524..2e3e5cc3d6 100644 --- a/include/oneapi/tbb/detail/_aggregator.h +++ b/include/oneapi/tbb/detail/_aggregator.h @@ -155,14 +155,17 @@ class aggregator : public aggregator_generic { // template friend class aggregating_functor; template class aggregating_functor { - AggregatingClass* my_object; + AggregatingClass* my_object{nullptr}; public: aggregating_functor() = default; aggregating_functor( AggregatingClass* object ) : my_object(object) { __TBB_ASSERT(my_object, nullptr); } - void operator()( OperationList* op_list ) { my_object->handle_operations(op_list); } + void operator()( OperationList* op_list ) { + __TBB_ASSERT(my_object, nullptr); + my_object->handle_operations(op_list); + } }; // class aggregating_functor diff --git a/include/oneapi/tbb/detail/_concurrent_queue_base.h b/include/oneapi/tbb/detail/_concurrent_queue_base.h index a418e61178..ee628e1e89 100644 --- a/include/oneapi/tbb/detail/_concurrent_queue_base.h +++ b/include/oneapi/tbb/detail/_concurrent_queue_base.h @@ -123,7 +123,7 @@ class micro_queue { page_allocator_traits::construct(page_allocator, p); } - if (tail_counter.load(std::memory_order_relaxed) != k) spin_wait_until_my_turn(tail_counter, k, base); + spin_wait_until_my_turn(tail_counter, k, base); d1::call_itt_notify(d1::acquired, &tail_counter); if (p) { @@ -134,9 +134,9 @@ class micro_queue { } else { head_page.store(p, std::memory_order_relaxed); } - tail_page.store(p, std::memory_order_release); + tail_page.store(p, std::memory_order_relaxed); } else { - p = tail_page.load(std::memory_order_acquire); // TODO may be relaxed ? + p = tail_page.load(std::memory_order_relaxed); } return index; } @@ -179,7 +179,7 @@ class micro_queue { d1::call_itt_notify(d1::acquired, &head_counter); spin_wait_while_eq(tail_counter, k); d1::call_itt_notify(d1::acquired, &tail_counter); - padded_page *p = head_page.load(std::memory_order_acquire); + padded_page *p = head_page.load(std::memory_order_relaxed); __TBB_ASSERT( p, nullptr ); size_type index = modulo_power_of_two( k/queue_rep_type::n_queue, items_per_page ); bool success = false; @@ -338,8 +338,8 @@ class micro_queue { } void spin_wait_until_my_turn( std::atomic& counter, ticket_type k, queue_rep_type& rb ) const { - for (atomic_backoff b(true);; b.pause()) { - ticket_type c = counter; + for (atomic_backoff b{};; b.pause()) { + ticket_type c = counter.load(std::memory_order_acquire); if (c == k) return; else if (c & 1) { ++rb.n_invalid_entries; @@ -380,9 +380,9 @@ class micro_queue_pop_finalizer { if( is_valid_page(p) ) { spin_mutex::scoped_lock lock( my_queue.page_mutex ); padded_page* q = p->next; - my_queue.head_page.store(q, std::memory_order_release); + my_queue.head_page.store(q, std::memory_order_relaxed); if( !is_valid_page(q) ) { - my_queue.tail_page.store(nullptr, std::memory_order_release); + my_queue.tail_page.store(nullptr, std::memory_order_relaxed); } } my_queue.head_counter.store(my_ticket_type, std::memory_order_release); diff --git a/include/oneapi/tbb/detail/_concurrent_skip_list.h b/include/oneapi/tbb/detail/_concurrent_skip_list.h index 89d2ee3c91..eaaaac81ca 100644 --- a/include/oneapi/tbb/detail/_concurrent_skip_list.h +++ b/include/oneapi/tbb/detail/_concurrent_skip_list.h @@ -1068,6 +1068,7 @@ class concurrent_skip_list { __TBB_ASSERT(!handle.empty(), "Extracted handle in merge is empty"); if (!insert(std::move(handle)).second) { + __TBB_ASSERT(!handle.empty(), "Handle should not be empty if insert fails"); //If the insertion fails - return the node into source source.insert(std::move(handle)); } diff --git a/include/oneapi/tbb/detail/_concurrent_unordered_base.h b/include/oneapi/tbb/detail/_concurrent_unordered_base.h index c684f4f736..4cbf91ad84 100644 --- a/include/oneapi/tbb/detail/_concurrent_unordered_base.h +++ b/include/oneapi/tbb/detail/_concurrent_unordered_base.h @@ -917,7 +917,7 @@ class concurrent_unordered_base { node_allocator_traits::deallocate(dummy_node_allocator, node, 1); } else { // GCC 11.1 issues a warning here that incorrect destructor might be called for dummy_nodes - #if (__TBB_GCC_VERSION >= 110100 && __TBB_GCC_VERSION < 120000 ) && !__clang__ && !__INTEL_COMPILER + #if (__TBB_GCC_VERSION >= 110100 && __TBB_GCC_VERSION < 130000 ) && !__clang__ && !__INTEL_COMPILER volatile #endif value_node_ptr val_node = static_cast(node); diff --git a/include/oneapi/tbb/detail/_config.h b/include/oneapi/tbb/detail/_config.h index 9bc2ed502f..1d0b98778c 100644 --- a/include/oneapi/tbb/detail/_config.h +++ b/include/oneapi/tbb/detail/_config.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -374,16 +374,12 @@ #define __TBB_ARENA_BINDING 1 #endif -#if (TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION || __TBB_BUILD) && __TBB_ARENA_BINDING - #define __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 1 -#endif - #ifndef __TBB_ENQUEUE_ENFORCED_CONCURRENCY #define __TBB_ENQUEUE_ENFORCED_CONCURRENCY 1 #endif #if !defined(__TBB_SURVIVE_THREAD_SWITCH) && \ - (_WIN32 || _WIN64 || __APPLE__ || (__unix__ && !__ANDROID__)) + (_WIN32 || _WIN64 || __APPLE__ || (defined(__unix__) && !__ANDROID__)) #define __TBB_SURVIVE_THREAD_SWITCH 1 #endif /* __TBB_SURVIVE_THREAD_SWITCH */ diff --git a/include/oneapi/tbb/detail/_flow_graph_join_impl.h b/include/oneapi/tbb/detail/_flow_graph_join_impl.h index 1273e1f9d5..5515421ede 100644 --- a/include/oneapi/tbb/detail/_flow_graph_join_impl.h +++ b/include/oneapi/tbb/detail/_flow_graph_join_impl.h @@ -384,7 +384,7 @@ graph_task* bypass_t; // constructor for value parameter queueing_port_operation(const T& e, op_type t) : - type(char(t)), my_val(e) + type(char(t)), my_val(e), my_arg(nullptr) , bypass_t(nullptr) {} // constructor for pointer parameter @@ -393,7 +393,7 @@ , bypass_t(nullptr) {} // constructor with no parameter - queueing_port_operation(op_type t) : type(char(t)) + queueing_port_operation(op_type t) : type(char(t)), my_arg(nullptr) , bypass_t(nullptr) {} }; @@ -422,6 +422,7 @@ break; case get__item: if(!this->buffer_empty()) { + __TBB_ASSERT(current->my_arg, nullptr); *(current->my_arg) = this->front(); current->status.store( SUCCEEDED, std::memory_order_release); } @@ -547,12 +548,12 @@ input_type *my_arg; // constructor for value parameter key_matching_port_operation(const input_type& e, op_type t) : - type(char(t)), my_val(e) {} + type(char(t)), my_val(e), my_arg(nullptr) {} // constructor for pointer parameter key_matching_port_operation(const input_type* p, op_type t) : type(char(t)), my_arg(const_cast(p)) {} // constructor with no parameter - key_matching_port_operation(op_type t) : type(char(t)) {} + key_matching_port_operation(op_type t) : type(char(t)), my_arg(nullptr) {} }; typedef aggregating_functor handler_type; @@ -573,6 +574,7 @@ break; case get__item: // use current_key from FE for item + __TBB_ASSERT(current->my_arg, nullptr); if(!this->find_with_key(my_join->current_key, *(current->my_arg))) { __TBB_ASSERT(false, "Failed to find item corresponding to current_key."); } @@ -667,12 +669,12 @@ template class join_node_FE : public reserving_forwarding_base { - public: + private: static const int N = std::tuple_size::value; typedef OutputTuple output_type; typedef InputTuple input_type; typedef join_node_base base_node_type; // for forwarding - + public: join_node_FE(graph &g) : reserving_forwarding_base(g), my_node(nullptr) { ports_with_no_inputs = N; join_helper::set_join_node_pointer(my_inputs, this); @@ -1212,10 +1214,10 @@ // using tuple_element. The class PT is the port type (reserving_port, queueing_port, key_matching_port) // and should match the typename. - template class PT, typename OutputTuple, typename JP> - class unfolded_join_node : public join_base::type { + template class PT, typename OutputTuple, typename JP> + class unfolded_join_node : public join_base::type { public: - typedef typename wrap_tuple_elements::type input_ports_type; + typedef typename wrap_tuple_elements::type input_ports_type; typedef OutputTuple output_type; private: typedef join_node_base base_type; diff --git a/include/oneapi/tbb/detail/_flow_graph_node_impl.h b/include/oneapi/tbb/detail/_flow_graph_node_impl.h index 5bc361a4d2..9e2f9adfcc 100644 --- a/include/oneapi/tbb/detail/_flow_graph_node_impl.h +++ b/include/oneapi/tbb/detail/_flow_graph_node_impl.h @@ -159,8 +159,8 @@ class function_input_base : public receiver, no_assign { }; graph_task* bypass_t; operation_type(const input_type& e, op_type t) : - type(char(t)), elem(const_cast(&e)) {} - operation_type(op_type t) : type(char(t)), r(nullptr) {} + type(char(t)), elem(const_cast(&e)), bypass_t(nullptr) {} + operation_type(op_type t) : type(char(t)), r(nullptr), bypass_t(nullptr) {} }; bool forwarder_busy; diff --git a/include/oneapi/tbb/detail/_segment_table.h b/include/oneapi/tbb/detail/_segment_table.h index 2f7f83670a..1a31d8a17d 100644 --- a/include/oneapi/tbb/detail/_segment_table.h +++ b/include/oneapi/tbb/detail/_segment_table.h @@ -60,17 +60,19 @@ class segment_table { static constexpr size_type pointers_per_long_table = sizeof(size_type) * 8; public: segment_table( const allocator_type& alloc = allocator_type() ) - : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table) + : my_segment_table_allocator(alloc), my_segment_table(nullptr) , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} { + my_segment_table.store(my_embedded_table, std::memory_order_relaxed); zero_table(my_embedded_table, pointers_per_embedded_table); } segment_table( const segment_table& other ) : my_segment_table_allocator(segment_table_allocator_traits:: select_on_container_copy_construction(other.my_segment_table_allocator)) - , my_segment_table(my_embedded_table), my_first_block{}, my_size{}, my_segment_table_allocation_failed{} + , my_segment_table(nullptr), my_first_block{}, my_size{}, my_segment_table_allocation_failed{} { + my_segment_table.store(my_embedded_table, std::memory_order_relaxed); zero_table(my_embedded_table, pointers_per_embedded_table); try_call( [&] { internal_transfer(other, copy_segment_body_type{*this}); @@ -80,9 +82,10 @@ class segment_table { } segment_table( const segment_table& other, const allocator_type& alloc ) - : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table) + : my_segment_table_allocator(alloc), my_segment_table(nullptr) , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} { + my_segment_table.store(my_embedded_table, std::memory_order_relaxed); zero_table(my_embedded_table, pointers_per_embedded_table); try_call( [&] { internal_transfer(other, copy_segment_body_type{*this}); @@ -92,17 +95,19 @@ class segment_table { } segment_table( segment_table&& other ) - : my_segment_table_allocator(std::move(other.my_segment_table_allocator)), my_segment_table(my_embedded_table) + : my_segment_table_allocator(std::move(other.my_segment_table_allocator)), my_segment_table(nullptr) , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} { + my_segment_table.store(my_embedded_table, std::memory_order_relaxed); zero_table(my_embedded_table, pointers_per_embedded_table); internal_move(std::move(other)); } segment_table( segment_table&& other, const allocator_type& alloc ) - : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table), my_first_block{} + : my_segment_table_allocator(alloc), my_segment_table(nullptr), my_first_block{} , my_size{}, my_segment_table_allocation_failed{} { + my_segment_table.store(my_embedded_table, std::memory_order_relaxed); zero_table(my_embedded_table, pointers_per_embedded_table); using is_equal_type = typename segment_table_allocator_traits::is_always_equal; internal_move_construct_with_allocator(std::move(other), alloc, is_equal_type()); diff --git a/include/oneapi/tbb/flow_graph.h b/include/oneapi/tbb/flow_graph.h index 225c1a57db..60016d9369 100644 --- a/include/oneapi/tbb/flow_graph.h +++ b/include/oneapi/tbb/flow_graph.h @@ -1183,8 +1183,9 @@ class buffer_node buffer_operation(const T& e, op_type t) : type(char(t)) , elem(const_cast(&e)) , ltask(nullptr) + , r(nullptr) {} - buffer_operation(op_type t) : type(char(t)), ltask(nullptr) {} + buffer_operation(op_type t) : type(char(t)), elem(nullptr), ltask(nullptr), r(nullptr) {} }; bool forwarder_busy; @@ -1271,12 +1272,14 @@ class buffer_node //! Register successor virtual void internal_reg_succ(buffer_operation *op) { + __TBB_ASSERT(op->r, nullptr); my_successors.register_successor(*(op->r)); op->status.store(SUCCEEDED, std::memory_order_release); } //! Remove successor virtual void internal_rem_succ(buffer_operation *op) { + __TBB_ASSERT(op->r, nullptr); my_successors.remove_successor(*(op->r)); op->status.store(SUCCEEDED, std::memory_order_release); } @@ -1330,12 +1333,14 @@ class buffer_node } virtual bool internal_push(buffer_operation *op) { + __TBB_ASSERT(op->elem, nullptr); this->push_back(*(op->elem)); op->status.store(SUCCEEDED, std::memory_order_release); return true; } virtual void internal_pop(buffer_operation *op) { + __TBB_ASSERT(op->elem, nullptr); if(this->pop_back(*(op->elem))) { op->status.store(SUCCEEDED, std::memory_order_release); } @@ -1345,6 +1350,7 @@ class buffer_node } virtual void internal_reserve(buffer_operation *op) { + __TBB_ASSERT(op->elem, nullptr); if(this->reserve_front(*(op->elem))) { op->status.store(SUCCEEDED, std::memory_order_release); } diff --git a/include/oneapi/tbb/info.h b/include/oneapi/tbb/info.h index 5a68960a84..dfcfcc031d 100644 --- a/include/oneapi/tbb/info.h +++ b/include/oneapi/tbb/info.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2019-2021 Intel Corporation + Copyright (c) 2019-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -49,7 +49,6 @@ struct constraints { max_concurrency = maximal_concurrency; return *this; } -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT constraints& set_core_type(core_type_id id) { core_type = id; return *this; @@ -58,14 +57,11 @@ struct constraints { max_threads_per_core = threads_number; return *this; } -#endif numa_node_id numa_id = -1; int max_concurrency = -1; -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT core_type_id core_type = -1; int max_threads_per_core = -1; -#endif }; } // namespace d1 @@ -96,7 +92,6 @@ inline int default_concurrency(numa_node_id id = -1) { return r1::numa_default_concurrency(id); } -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT inline std::vector core_types() { std::vector core_type_indexes(r1::core_type_count()); r1::fill_core_type_indices(core_type_indexes.data()); @@ -107,22 +102,17 @@ inline int default_concurrency(constraints c) { if (c.max_concurrency > 0) { return c.max_concurrency; } return r1::constraints_default_concurrency(c); } -#endif /*__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::numa_node_id; -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT using detail::d1::core_type_id; -#endif namespace info { using detail::d1::numa_nodes; -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT using detail::d1::core_types; -#endif using detail::d1::default_concurrency; } // namespace info diff --git a/include/oneapi/tbb/parallel_for.h b/include/oneapi/tbb/parallel_for.h index d3c8fdd849..a9e9a1c2fe 100644 --- a/include/oneapi/tbb/parallel_for.h +++ b/include/oneapi/tbb/parallel_for.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -79,6 +79,7 @@ struct start_for : public task { start_for( const Range& range, const Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : my_range(range), my_body(body), + my_parent(nullptr), my_partition(partitioner), my_allocator(alloc) {} //! Splitting constructor used to generate children. @@ -86,6 +87,7 @@ struct start_for : public task { start_for( start_for& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : my_range(parent_.my_range, get_range_split_object(split_obj)), my_body(parent_.my_body), + my_parent(nullptr), my_partition(parent_.my_partition, split_obj), my_allocator(alloc) {} //! Construct right child from the given range as response to the demand. @@ -93,6 +95,7 @@ struct start_for : public task { start_for( start_for& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : my_range(r), my_body(parent_.my_body), + my_parent(nullptr), my_partition(parent_.my_partition, split()), my_allocator(alloc) { diff --git a/include/oneapi/tbb/parallel_for_each.h b/include/oneapi/tbb/parallel_for_each.h index 8eb9e2069e..795e7d03f1 100644 --- a/include/oneapi/tbb/parallel_for_each.h +++ b/include/oneapi/tbb/parallel_for_each.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -95,7 +95,7 @@ struct parallel_for_each_operator_selector { body(std::forward(item)); #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) - #pragma warning (push) + #pragma warning (pop) #endif } @@ -112,7 +112,7 @@ struct parallel_for_each_operator_selector { body(std::forward(item), *feeder); #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) - #pragma warning (push) + #pragma warning (pop) #endif } }; @@ -306,7 +306,9 @@ struct input_block_handling_task : public task { ~input_block_handling_task() { for(std::size_t counter = 0; counter < max_block_size; ++counter) { (task_pool.begin() + counter)->~iteration_task(); - (block_iteration_space.begin() + counter)->~Item(); + if (counter < my_size) { + (block_iteration_space.begin() + counter)->~Item(); + } } } diff --git a/include/oneapi/tbb/parallel_reduce.h b/include/oneapi/tbb/parallel_reduce.h index 0dfd486abd..a1bc8f3dab 100644 --- a/include/oneapi/tbb/parallel_reduce.h +++ b/include/oneapi/tbb/parallel_reduce.h @@ -104,6 +104,7 @@ struct start_reduce : public task { start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : my_range(range), my_body(&body), + my_parent(nullptr), my_partition(partitioner), my_allocator(alloc), is_right_child(false) {} @@ -112,6 +113,7 @@ struct start_reduce : public task { start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : my_range(parent_.my_range, get_range_split_object(split_obj)), my_body(parent_.my_body), + my_parent(nullptr), my_partition(parent_.my_partition, split_obj), my_allocator(alloc), is_right_child(true) @@ -123,6 +125,7 @@ struct start_reduce : public task { start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : my_range(r), my_body(parent_.my_body), + my_parent(nullptr), my_partition(parent_.my_partition, split()), my_allocator(alloc), is_right_child(true) @@ -201,6 +204,7 @@ task* start_reduce::execute(execution_data& ed) { // The acquire barrier synchronizes the data pointed with my_body if the left // task has already finished. + __TBB_ASSERT(my_parent, nullptr); if( is_right_child && my_parent->m_ref_count.load(std::memory_order_acquire) == 2 ) { tree_node_type* parent_ptr = static_cast(my_parent); my_body = (Body*) new( parent_ptr->zombie_space.begin() ) Body(*my_body, split()); @@ -261,6 +265,7 @@ struct start_deterministic_reduce : public task { start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) : my_range(range), my_body(body), + my_parent(nullptr), my_partition(partitioner), my_allocator(alloc) {} //! Splitting constructor used to generate children. @@ -269,6 +274,7 @@ struct start_deterministic_reduce : public task { small_object_allocator& alloc ) : my_range(parent_.my_range, get_range_split_object(split_obj)), my_body(body), + my_parent(nullptr), my_partition(parent_.my_partition, split_obj), my_allocator(alloc) {} static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { diff --git a/include/oneapi/tbb/partitioner.h b/include/oneapi/tbb/partitioner.h index 5f3c0cc512..cbbf5e9894 100644 --- a/include/oneapi/tbb/partitioner.h +++ b/include/oneapi/tbb/partitioner.h @@ -160,6 +160,7 @@ struct tree_node : public node { template void fold_tree(node* n, const execution_data& ed) { for (;;) { + __TBB_ASSERT(n, nullptr); __TBB_ASSERT(n->m_ref_count.load(std::memory_order_relaxed) > 0, "The refcount must be positive."); call_itt_task_notify(releasing, n); if (--n->m_ref_count > 0) { diff --git a/include/oneapi/tbb/task_arena.h b/include/oneapi/tbb/task_arena.h index 69c8b94765..0de49aef07 100644 --- a/include/oneapi/tbb/task_arena.h +++ b/include/oneapi/tbb/task_arena.h @@ -187,13 +187,8 @@ class task_arena_base { , my_num_reserved_slots(reserved_for_masters) , my_priority(a_priority) , my_numa_id(constraints_.numa_id) -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT , my_core_type(constraints_.core_type) , my_max_threads_per_core(constraints_.max_threads_per_core) -#else - , my_core_type(automatic) - , my_max_threads_per_core(automatic) -#endif {} #endif /*__TBB_ARENA_BINDING*/ public: @@ -280,10 +275,8 @@ class task_arena : public task_arena_base { constraints{} .set_numa_id(s.my_numa_id) .set_max_concurrency(s.my_max_concurrency) -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT .set_core_type(s.my_core_type) .set_max_threads_per_core(s.my_max_threads_per_core) -#endif , s.my_num_reserved_slots, s.my_priority) {} #else @@ -337,10 +330,8 @@ class task_arena : public task_arena_base { if( !is_active() ) { my_numa_id = constraints_.numa_id; my_max_concurrency = constraints_.max_concurrency; -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT my_core_type = constraints_.core_type; my_max_threads_per_core = constraints_.max_threads_per_core; -#endif my_num_reserved_slots = reserved_for_masters; my_priority = a_priority; r1::initialize(*this); diff --git a/include/oneapi/tbb/version.h b/include/oneapi/tbb/version.h index 22e67dad50..1396b85b8d 100644 --- a/include/oneapi/tbb/version.h +++ b/include/oneapi/tbb/version.h @@ -29,7 +29,7 @@ // Product version #define TBB_VERSION_MAJOR 2021 // Update version -#define TBB_VERSION_MINOR 8 +#define TBB_VERSION_MINOR 9 // "Patch" version for custom releases #define TBB_VERSION_PATCH 0 // Suffix string @@ -40,7 +40,7 @@ // OneAPI oneTBB specification version #define ONETBB_SPEC_VERSION "1.0" // Full interface version -#define TBB_INTERFACE_VERSION 12080 +#define TBB_INTERFACE_VERSION 12090 // Major interface version #define TBB_INTERFACE_VERSION_MAJOR (TBB_INTERFACE_VERSION/1000) // Minor interface version diff --git a/python/rml/ipc_server.cpp b/python/rml/ipc_server.cpp index 38cf9027ff..fff5e9edd9 100644 --- a/python/rml/ipc_server.cpp +++ b/python/rml/ipc_server.cpp @@ -519,7 +519,7 @@ void ipc_worker::release_handle(thread_handle handle, bool join) { } void ipc_worker::start_shutdown(bool join) { - state_t s = my_state.load(std::memory_order_relaxed);; + state_t s = my_state.load(std::memory_order_relaxed); do { __TBB_ASSERT( s!=st_quit, nullptr ); @@ -538,7 +538,7 @@ void ipc_worker::start_shutdown(bool join) { } void ipc_worker::start_stopping(bool join) { - state_t s = my_state.load(std::memory_order_relaxed);; + state_t s = my_state.load(std::memory_order_relaxed); while( !my_state.compare_exchange_strong( s, st_quit ) ) {}; if( s==st_normal || s==st_starting ) { diff --git a/src/tbb/CMakeLists.txt b/src/tbb/CMakeLists.txt index bff2a4fde7..996bf6c186 100644 --- a/src/tbb/CMakeLists.txt +++ b/src/tbb/CMakeLists.txt @@ -93,8 +93,8 @@ tbb_handle_ipo(tbb) if (TBB_DEF_FILE_PREFIX) # If there's no prefix, assume we're using export directives set_target_properties(tbb PROPERTIES - LINK_FLAGS ${TBB_LINK_DEF_FILE_FLAG}${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbb.def - LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbb.def + LINK_FLAGS "${TBB_LINK_DEF_FILE_FLAG}\"${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbb.def\"" + LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbb.def" ) endif() diff --git a/src/tbb/arena.cpp b/src/tbb/arena.cpp index 2161ed5dc2..e79f689b82 100644 --- a/src/tbb/arena.cpp +++ b/src/tbb/arena.cpp @@ -436,17 +436,11 @@ void task_arena_impl::initialize(d1::task_arena_base& ta) { (void)governor::get_thread_data(); if (ta.my_max_concurrency < 1) { #if __TBB_ARENA_BINDING - -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT d1::constraints arena_constraints = d1::constraints{} .set_core_type(ta.core_type()) .set_max_threads_per_core(ta.max_threads_per_core()) .set_numa_id(ta.my_numa_id); ta.my_max_concurrency = (int)default_concurrency(arena_constraints); -#else /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ - ta.my_max_concurrency = (int)default_concurrency(ta.my_numa_id); -#endif /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ - #else /*!__TBB_ARENA_BINDING*/ ta.my_max_concurrency = (int)governor::default_num_threads(); #endif /*!__TBB_ARENA_BINDING*/ @@ -736,15 +730,11 @@ int task_arena_impl::max_concurrency(const d1::task_arena_base *ta) { #if __TBB_ARENA_BINDING if (ta) { -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT d1::constraints arena_constraints = d1::constraints{} .set_numa_id(ta->my_numa_id) .set_core_type(ta->core_type()) .set_max_threads_per_core(ta->max_threads_per_core()); return (int)default_concurrency(arena_constraints); -#else /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ - return (int)default_concurrency(ta->my_numa_id); -#endif /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ } #endif /*!__TBB_ARENA_BINDING*/ diff --git a/src/tbb/arena_slot.h b/src/tbb/arena_slot.h index d9a70cfaf8..cdd91902d1 100644 --- a/src/tbb/arena_slot.h +++ b/src/tbb/arena_slot.h @@ -221,7 +221,7 @@ class arena_slot : private arena_slot_shared_state, private arena_slot_private_s } acquire_task_pool(); std::size_t H = head.load(std::memory_order_relaxed); // mirror - d1::task** new_task_pool = task_pool_ptr;; + d1::task** new_task_pool = task_pool_ptr; __TBB_ASSERT( my_task_pool_size >= min_task_pool_size, nullptr); // Count not skipped tasks. Consider using std::count_if. for ( std::size_t i = H; i < T; ++i ) diff --git a/src/tbb/itt_notify.h b/src/tbb/itt_notify.h index 5fc9d5424f..48ddc5caec 100644 --- a/src/tbb/itt_notify.h +++ b/src/tbb/itt_notify.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -34,6 +34,7 @@ #include "tools_api/ittnotify.h" #include "tools_api/legacy/ittnotify.h" extern "C" void __itt_fini_ittlib(void); +extern "C" void __itt_release_resources(void); #if _WIN32||_WIN64 #undef _T @@ -75,6 +76,7 @@ extern const tchar #define ITT_NOTIFY(name,obj) __itt_##name(const_cast(static_cast(obj))) #define ITT_THREAD_SET_NAME(name) __itt_thread_set_name(name) #define ITT_FINI_ITTLIB() __itt_fini_ittlib() +#define ITT_RELEASE_RESOURCES() __itt_release_resources() #define ITT_SYNC_CREATE(obj, type, name) __itt_sync_create((void*)(obj), type, name, 2) #define ITT_STACK_CREATE(obj) obj = __itt_stack_caller_create() #define ITT_STACK_DESTROY(obj) (obj!=nullptr) ? __itt_stack_caller_destroy(static_cast<__itt_caller>(obj)) : ((void)0) @@ -94,6 +96,7 @@ extern const tchar #define ITT_NOTIFY(name,obj) ((void)0) #define ITT_THREAD_SET_NAME(name) ((void)0) #define ITT_FINI_ITTLIB() ((void)0) +#define ITT_RELEASE_RESOURCES() ((void)0) #define ITT_SYNC_CREATE(obj, type, name) ((void)0) #define ITT_STACK_CREATE(obj) ((void)0) #define ITT_STACK_DESTROY(obj) ((void)0) diff --git a/src/tbb/main.cpp b/src/tbb/main.cpp index d86c3b696b..8a1dc89320 100644 --- a/src/tbb/main.cpp +++ b/src/tbb/main.cpp @@ -100,6 +100,7 @@ void __TBB_InitOnce::remove_ref() { if( k==0 ) { governor::release_resources(); ITT_FINI_ITTLIB(); + ITT_RELEASE_RESOURCES(); } } diff --git a/src/tbb/rml_thread_monitor.h b/src/tbb/rml_thread_monitor.h index 13b556380f..57e9c30b07 100644 --- a/src/tbb/rml_thread_monitor.h +++ b/src/tbb/rml_thread_monitor.h @@ -31,6 +31,7 @@ #include #include #include +#include #else #error Unsupported platform #endif @@ -191,8 +192,25 @@ inline thread_monitor::handle_type thread_monitor::launch( void* (*thread_routin check(pthread_attr_init( &s ), "pthread_attr_init has failed"); if( stack_size>0 ) check(pthread_attr_setstacksize( &s, stack_size ), "pthread_attr_setstack_size has failed" ); + + // pthread_create(2) can spuriously fail with EAGAIN. We retry + // max_num_tries times with progressively longer wait times. pthread_t handle; - check( pthread_create( &handle, &s, thread_routine, arg ), "pthread_create has failed" ); + const int max_num_tries = 20; + int error = EAGAIN; + + for (int i = 0; i < max_num_tries && error == EAGAIN; i++) { + if (i != 0) { + // Wait i milliseconds + struct timespec ts = {0, i * 1000 * 1000}; + nanosleep(&ts, NULL); + } + error = pthread_create(&handle, &s, thread_routine, arg); + } + + if (error) + handle_perror(error, "pthread_create has failed"); + check( pthread_attr_destroy( &s ), "pthread_attr_destroy has failed" ); return handle; } diff --git a/src/tbb/semaphore.h b/src/tbb/semaphore.h index 4835f839db..9d27f3ac98 100644 --- a/src/tbb/semaphore.h +++ b/src/tbb/semaphore.h @@ -22,10 +22,7 @@ #if _WIN32||_WIN64 #include #elif __APPLE__ -#include -#include -#include -#include +#include #else #include #ifdef TBB_USE_DEBUG @@ -150,28 +147,18 @@ class semaphore : no_copy { class semaphore : no_copy { public: //! ctor - semaphore(int start_cnt_ = 0) : sem(start_cnt_) { init_semaphore(start_cnt_); } + semaphore(int start_cnt_ = 0) { my_sem = dispatch_semaphore_create(start_cnt_); } //! dtor - ~semaphore() { - kern_return_t ret = semaphore_destroy( mach_task_self(), sem ); - __TBB_ASSERT_EX( ret==err_none, nullptr); - } + ~semaphore() { dispatch_release(my_sem); } //! wait/acquire void P() { - int ret; - do { - ret = semaphore_wait( sem ); - } while( ret==KERN_ABORTED ); - __TBB_ASSERT( ret==KERN_SUCCESS, "semaphore_wait() failed" ); + std::intptr_t ret = dispatch_semaphore_wait(my_sem, DISPATCH_TIME_FOREVER); + __TBB_ASSERT_EX(ret == 0, "dispatch_semaphore_wait() failed"); } //! post/release - void V() { semaphore_signal( sem ); } + void V() { dispatch_semaphore_signal(my_sem); } private: - semaphore_t sem; - void init_semaphore(int start_cnt_) { - kern_return_t ret = semaphore_create( mach_task_self(), &sem, SYNC_POLICY_FIFO, start_cnt_ ); - __TBB_ASSERT_EX( ret==err_none, "failed to create a semaphore" ); - } + dispatch_semaphore_t my_sem; }; #else /* Linux/Unix */ typedef uint32_t sem_count_t; @@ -244,31 +231,7 @@ class binary_semaphore : no_copy { #endif /* !__TBB_USE_SRWLOCK */ #elif __APPLE__ //! binary_semaphore for concurrent monitor -class binary_semaphore : no_copy { -public: - //! ctor - binary_semaphore() : my_sem(0) { - kern_return_t ret = semaphore_create( mach_task_self(), &my_sem, SYNC_POLICY_FIFO, 0 ); - __TBB_ASSERT_EX( ret==err_none, "failed to create a semaphore" ); - } - //! dtor - ~binary_semaphore() { - kern_return_t ret = semaphore_destroy( mach_task_self(), my_sem ); - __TBB_ASSERT_EX( ret==err_none, nullptr); - } - //! wait/acquire - void P() { - int ret; - do { - ret = semaphore_wait( my_sem ); - } while( ret==KERN_ABORTED ); - __TBB_ASSERT( ret==KERN_SUCCESS, "semaphore_wait() failed" ); - } - //! post/release - void V() { semaphore_signal( my_sem ); } -private: - semaphore_t my_sem; -}; +using binary_semaphore = semaphore; #else /* Linux/Unix */ #if __TBB_USE_FUTEX diff --git a/src/tbb/task_stream.h b/src/tbb/task_stream.h index dc0b6818bc..0aaace52b4 100644 --- a/src/tbb/task_stream.h +++ b/src/tbb/task_stream.h @@ -193,10 +193,11 @@ class task_stream : public task_stream_accessor< accessor > { d1::task* pop( const lane_selector_t& next_lane ) { d1::task* popped = nullptr; unsigned lane = 0; - do { - lane = next_lane( /*out_of=*/N ); - __TBB_ASSERT( lane < N, "Incorrect lane index." ); - } while( !empty() && !(popped = try_pop( lane )) ); + for (atomic_backoff b; !empty() && !popped; b.pause()) { + lane = next_lane( /*out_of=*/N); + __TBB_ASSERT(lane < N, "Incorrect lane index."); + popped = try_pop(lane); + } return popped; } diff --git a/src/tbb/tbb.rc b/src/tbb/tbb.rc index 4c879786ea..be3e301135 100644 --- a/src/tbb/tbb.rc +++ b/src/tbb/tbb.rc @@ -1,4 +1,4 @@ -// Copyright (c) 2005-2022 Intel Corporation +// Copyright (c) 2005-2023 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,29 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Microsoft Visual C++ generated resource script. -// -#ifdef APSTUDIO_INVOKED -#ifndef APSTUDIO_READONLY_SYMBOLS -#define _APS_NO_MFC 1 -#define _APS_NEXT_RESOURCE_VALUE 102 -#define _APS_NEXT_COMMAND_VALUE 40001 -#define _APS_NEXT_CONTROL_VALUE 1001 -#define _APS_NEXT_SYMED_VALUE 101 -#endif -#endif - -#define APSTUDIO_READONLY_SYMBOLS ///////////////////////////////////////////////////////////////////////////// // -// Generated from the TEXTINCLUDE 2 resource. +// Includes // #include #include "../../include/oneapi/tbb/version.h" -///////////////////////////////////////////////////////////////////////////// -#undef APSTUDIO_READONLY_SYMBOLS - ///////////////////////////////////////////////////////////////////////////// // Neutral resources @@ -43,13 +27,6 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL #pragma code_page(1252) #endif //_WIN32 -///////////////////////////////////////////////////////////////////////////// -// manifest integration -#ifdef TBB_MANIFEST -#include "winuser.h" -2 RT_MANIFEST tbbmanifest.exe.manifest -#endif - ///////////////////////////////////////////////////////////////////////////// // // Version @@ -95,17 +72,3 @@ BEGIN VALUE "Translation", 0x0, 1200 END END - -//#endif // Neutral resources -///////////////////////////////////////////////////////////////////////////// - - -#ifndef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 3 resource. -// - - -///////////////////////////////////////////////////////////////////////////// -#endif // not APSTUDIO_INVOKED diff --git a/src/tbb/tools_api/disable_warnings.h b/src/tbb/tools_api/disable_warnings.h index 27aa3ee0ce..977569d087 100644 --- a/src/tbb/tools_api/disable_warnings.h +++ b/src/tbb/tools_api/disable_warnings.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,7 +16,9 @@ #include "ittnotify_config.h" -#if ITT_PLATFORM==ITT_PLATFORM_WIN && _MSC_VER +#if ITT_PLATFORM==ITT_PLATFORM_WIN + +#if defined _MSC_VER #pragma warning (disable: 593) /* parameter "XXXX" was set but never used */ #pragma warning (disable: 344) /* typedef name has already been declared (with same type) */ @@ -24,6 +26,8 @@ #pragma warning (disable: 4127) /* conditional expression is constant */ #pragma warning (disable: 4306) /* conversion from '?' to '?' of greater size */ +#endif + #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #if defined __INTEL_COMPILER diff --git a/src/tbb/tools_api/ittnotify.h b/src/tbb/tools_api/ittnotify.h index 1eecd2faa7..e701980f6a 100644 --- a/src/tbb/tools_api/ittnotify.h +++ b/src/tbb/tools_api/ittnotify.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ @brief Public User API functions and types @mainpage -The Instrumentation and Tracing Technology API (ITT API) is used to +The Instrumentation and Tracing Technology API (ITT API) is used to annotate a user's program with additional information that can be used by correctness and performance tools. The user inserts calls in their program. Those calls generate information that is collected @@ -188,7 +188,12 @@ The same ID may not be reused for different instances, unless a previous #if ITT_PLATFORM==ITT_PLATFORM_WIN /* use __forceinline (VC++ specific) */ +#if defined(__MINGW32__) && !defined(__cplusplus) +#define ITT_INLINE static __inline__ __attribute__((__always_inline__,__gnu_inline__)) +#else #define ITT_INLINE static __forceinline +#endif /* __MINGW32__ */ + #define ITT_INLINE_ATTRIBUTE /* nothing */ #else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ /* @@ -249,20 +254,20 @@ The same ID may not be reused for different instances, unless a previous #define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n) #define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n) -#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) -#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) -#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) -#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) -#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) -#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) -#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) -#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d) -#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x) -#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y) -#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z) -#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) -#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) -#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) +#define ITTNOTIFY_VOID_D0(n,d) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_VOID_D1(n,d,x) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_VOID_D2(n,d,x,y) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) +#define ITTNOTIFY_DATA_D0(n,d) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_DATA_D1(n,d,x) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_DATA_D2(n,d,x,y) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) #ifdef ITT_STUB #undef ITT_STUB @@ -590,6 +595,18 @@ typedef enum __itt_suppress_mode { __itt_suppress_range } __itt_suppress_mode_t; +/** + * @enum __itt_collection_state + * @brief Enumerator for collection state. All non-work states have negative values. + */ +typedef enum { + __itt_collection_uninitialized = 0, /* uninitialized */ + __itt_collection_init_fail = 1, /* failed to init */ + __itt_collection_collector_absent = 2, /* non work state collector exists */ + __itt_collection_collector_exists = 3, /* work state collector exists */ + __itt_collection_init_successful = 4 /* success to init */ +} __itt_collection_state; + /** * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask */ @@ -3869,6 +3886,125 @@ ITT_STUBV(ITTAPI, void, module_unload_with_sections, (__itt_module_object* modu #endif /* INTEL_NO_MACRO_BODY */ /** @endcond */ +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_histogram +{ + const __itt_domain* domain; /*!< Domain of the histogram*/ + const char* nameA; /*!< Name of the histogram */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* nameW; +#else /* UNICODE || _UNICODE */ + void* nameW; +#endif /* UNICODE || _UNICODE */ + __itt_metadata_type x_type; /*!< Type of the histogram X axis */ + __itt_metadata_type y_type; /*!< Type of the histogram Y axis */ + int extra1; /*!< Reserved to the runtime */ + void* extra2; /*!< Reserved to the runtime */ + struct ___itt_histogram* next; +} __itt_histogram; + +#pragma pack(pop) +/** @endcond */ + +/** + * @brief Create a typed histogram instance with given name/domain. + * @param[in] domain The domain controlling the call. + * @param[in] name The name of the histogram. + * @param[in] x_type The type of the X axis in histogram (may be 0 to calculate batch statistics). + * @param[in] y_type The type of the Y axis in histogram. +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_histogram* ITTAPI __itt_histogram_createA(const __itt_domain* domain, const char* name, __itt_metadata_type x_type, __itt_metadata_type y_type); +__itt_histogram* ITTAPI __itt_histogram_createW(const __itt_domain* domain, const wchar_t* name, __itt_metadata_type x_type, __itt_metadata_type y_type); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_histogram_create __itt_histogram_createW +# define __itt_histogram_create_ptr __itt_histogram_createW_ptr +#else /* UNICODE */ +# define __itt_histogram_create __itt_histogram_createA +# define __itt_histogram_create_ptr __itt_histogram_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_histogram* ITTAPI __itt_histogram_create(const __itt_domain* domain, const char* name, __itt_metadata_type x_type, __itt_metadata_type y_type); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_histogram*, histogram_createA, (const __itt_domain* domain, const char* name, __itt_metadata_type x_type, __itt_metadata_type y_type)) +ITT_STUB(ITTAPI, __itt_histogram*, histogram_createW, (const __itt_domain* domain, const wchar_t* name, __itt_metadata_type x_type, __itt_metadata_type y_type)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_histogram*, histogram_create, (const __itt_domain* domain, const char* name, __itt_metadata_type x_type, __itt_metadata_type y_type)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_histogram_createA ITTNOTIFY_DATA(histogram_createA) +#define __itt_histogram_createA_ptr ITTNOTIFY_NAME(histogram_createA) +#define __itt_histogram_createW ITTNOTIFY_DATA(histogram_createW) +#define __itt_histogram_createW_ptr ITTNOTIFY_NAME(histogram_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_histogram_create ITTNOTIFY_DATA(histogram_create) +#define __itt_histogram_create_ptr ITTNOTIFY_NAME(histogram_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_histogram_createA(domain, name, x_type, y_type) (__itt_histogram*)0 +#define __itt_histogram_createA_ptr 0 +#define __itt_histogram_createW(domain, name, x_type, y_type) (__itt_histogram*)0 +#define __itt_histogram_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_histogram_create(domain, name, x_type, y_type) (__itt_histogram*)0 +#define __itt_histogram_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_histogram_createA_ptr 0 +#define __itt_histogram_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_histogram_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Submit statistics for a histogram instance. + * @param[in] histogram Pointer to the histogram instance to which the histogram statistic is to be dumped. + * @param[in] length The number of elements in dumped axis data array. + * @param[in] x_data The X axis dumped data itself (may be NULL to calculate batch statistics). + * @param[in] y_data The Y axis dumped data itself. +*/ +void ITTAPI __itt_histogram_submit(__itt_histogram* histogram, size_t length, void* x_data, void* y_data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, histogram_submit, (__itt_histogram* histogram, size_t length, void* x_data, void* y_data)) +#define __itt_histogram_submit ITTNOTIFY_VOID(histogram_submit) +#define __itt_histogram_submit_ptr ITTNOTIFY_NAME(histogram_submit) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_histogram_submit(histogram, length, x_data, y_data) +#define __itt_histogram_submit_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_histogram_submit_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ + +/** +* @brief function allows to obtain the current collection state at the moment +* @return collection state as a enum __itt_collection_state +*/ +__itt_collection_state __itt_get_collection_state(void); + +/** +* @brief function releases resources allocated by ITT API static part +* this API should be called from the library destructor +* @return void +*/ +void __itt_release_resources(void); +/** @endcond */ + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/src/tbb/tools_api/ittnotify_config.h b/src/tbb/tools_api/ittnotify_config.h index 5e7c0cdf5e..0f5d80f659 100644 --- a/src/tbb/tools_api/ittnotify_config.h +++ b/src/tbb/tools_api/ittnotify_config.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -121,7 +121,12 @@ #if ITT_PLATFORM==ITT_PLATFORM_WIN /* use __forceinline (VC++ specific) */ +#if defined(__MINGW32__) && !defined(__cplusplus) +#define ITT_INLINE static __inline__ __attribute__((__always_inline__,__gnu_inline__)) +#else #define ITT_INLINE static __forceinline +#endif /* __MINGW32__ */ + #define ITT_INLINE_ATTRIBUTE /* nothing */ #else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ /* @@ -167,6 +172,18 @@ # define ITT_ARCH_LOONGARCH64 7 #endif /* ITT_ARCH_LOONGARCH64 */ +#ifndef ITT_ARCH_S390X +# define ITT_ARCH_S390X 8 +#endif /* ITT_ARCH_S390X */ + +#ifndef ITT_ARCH_HPPA +# define ITT_ARCH_HPPA 9 +#endif /* ITT_ARCH_HPPA */ + +#ifndef ITT_ARCH_RISCV64 +# define ITT_ARCH_RISCV64 10 +#endif /* ITT_ARCH_RISCV64 */ + #ifndef ITT_ARCH # if defined _M_IX86 || defined __i386__ # define ITT_ARCH ITT_ARCH_IA32 @@ -182,7 +199,14 @@ # define ITT_ARCH ITT_ARCH_PPC64 # elif defined __loongarch__ # define ITT_ARCH ITT_ARCH_LOONGARCH64 +# elif defined __s390__ || defined __s390x__ +# define ITT_ARCH ITT_ARCH_S390X +# elif defined __hppa__ +# define ITT_ARCH ITT_ARCH_HPPA +# elif defined __riscv && __riscv_xlen == 64 +# define ITT_ARCH ITT_ARCH_RISCV64 # endif + #endif #ifdef __cplusplus @@ -211,7 +235,7 @@ #define API_VERSION_BUILD 20180723 #ifndef API_VERSION_NUM -#define API_VERSION_NUM 3.18.6 +#define API_VERSION_NUM 3.23.0 #endif /* API_VERSION_NUM */ #define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \ @@ -255,13 +279,13 @@ typedef pthread_mutex_t mutex_t; #define __itt_mutex_init(mutex) InitializeCriticalSection(mutex) #define __itt_mutex_lock(mutex) EnterCriticalSection(mutex) #define __itt_mutex_unlock(mutex) LeaveCriticalSection(mutex) +#define __itt_mutex_destroy(mutex) DeleteCriticalSection(mutex) #define __itt_load_lib(name) LoadLibraryA(name) #define __itt_unload_lib(handle) FreeLibrary(handle) #define __itt_system_error() (int)GetLastError() #define __itt_fstrcmp(s1, s2) lstrcmpA(s1, s2) #define __itt_fstrnlen(s, l) strnlen_s(s, l) #define __itt_fstrcpyn(s1, b, s2, l) strncpy_s(s1, b, s2, l) -#define __itt_fstrdup(s) _strdup(s) #define __itt_thread_id() GetCurrentThreadId() #define __itt_thread_yield() SwitchToThread() #ifndef ITT_SIMPLE_INIT @@ -271,6 +295,13 @@ ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) { return InterlockedIncrement(ptr); } +ITT_INLINE long +__itt_interlocked_compare_exchange(volatile long* ptr, long exchange, long comperand) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long +__itt_interlocked_compare_exchange(volatile long* ptr, long exchange, long comperand) +{ + return InterlockedCompareExchange(ptr, exchange, comperand); +} #endif /* ITT_SIMPLE_INIT */ #define DL_SYMBOLS (1) @@ -300,6 +331,7 @@ ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) } #define __itt_mutex_lock(mutex) pthread_mutex_lock(mutex) #define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex) +#define __itt_mutex_destroy(mutex) pthread_mutex_destroy(mutex) #define __itt_load_lib(name) dlopen(name, RTLD_LAZY) #define __itt_unload_lib(handle) dlclose(handle) #define __itt_system_error() errno @@ -326,14 +358,13 @@ ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) } #endif /* SDL_STRNCPY_S */ -#define __itt_fstrdup(s) strdup(s) #define __itt_thread_id() pthread_self() #define __itt_thread_yield() sched_yield() #if ITT_ARCH==ITT_ARCH_IA64 #ifdef __INTEL_COMPILER #define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val) #else /* __INTEL_COMPILER */ -/* TODO: Add Support for not Intel compilers for IA-64 architecture */ +#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val) #endif /* __INTEL_COMPILER */ #elif ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_IA32E /* ITT_ARCH!=ITT_ARCH_IA64 */ ITT_INLINE long @@ -357,6 +388,13 @@ ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) { return __TBB_machine_fetchadd4(ptr, 1) + 1L; } +ITT_INLINE long +__itt_interlocked_compare_exchange(volatile long* ptr, long exchange, long comperand) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long +__itt_interlocked_compare_exchange(volatile long* ptr, long exchange, long comperand) +{ + return __sync_val_compare_and_swap(ptr, exchange, comperand); +} #endif /* ITT_SIMPLE_INIT */ void* dlopen(const char*, int) __attribute__((weak)); @@ -376,10 +414,20 @@ pthread_t pthread_self(void) __attribute__((weak)); #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -typedef enum { - __itt_collection_normal = 0, - __itt_collection_paused = 1 -} __itt_collection_state; +/* strdup() is not included into C99 which results in a compiler warning about + * implicitly declared symbol. To avoid the issue strdup is implemented + * manually. + */ +#define ITT_STRDUP_MAX_STRING_SIZE 4096 +#define __itt_fstrdup(s, new_s) do { \ + if (s != NULL) { \ + size_t s_len = __itt_fstrnlen(s, ITT_STRDUP_MAX_STRING_SIZE); \ + new_s = (char *)malloc(s_len + 1); \ + if (new_s != NULL) { \ + __itt_fstrcpyn(new_s, s_len + 1, s, s_len); \ + } \ + } \ +} while(0) typedef enum { __itt_thread_normal = 0, @@ -445,6 +493,9 @@ typedef struct __itt_counter_info struct ___itt_domain; struct ___itt_string_handle; +struct ___itt_histogram; + +#include "ittnotify.h" typedef struct ___itt_global { @@ -466,8 +517,9 @@ typedef struct ___itt_global struct ___itt_domain* domain_list; struct ___itt_string_handle* string_list; __itt_collection_state state; - __itt_counter_info_t* counter_list; + __itt_counter_info_t* counter_list; unsigned int ipt_collect_events; + struct ___itt_histogram* histogram_list; } __itt_global; #pragma pack(pop) @@ -493,7 +545,9 @@ typedef struct ___itt_global h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ if (h != NULL) { \ h->tid = t; \ - h->nameA = n ? __itt_fstrdup(n) : NULL; \ + char *n_copy = NULL; \ + __itt_fstrdup(n, n_copy); \ + h->nameA = n_copy; \ h->nameW = NULL; \ h->state = s; \ h->extra1 = 0; /* reserved */ \ @@ -526,7 +580,9 @@ typedef struct ___itt_global h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ if (h != NULL) { \ h->flags = 1; /* domain is enabled by default */ \ - h->nameA = name ? __itt_fstrdup(name) : NULL; \ + char *name_copy = NULL; \ + __itt_fstrdup(name, name_copy); \ + h->nameA = name_copy; \ h->nameW = NULL; \ h->extra1 = 0; /* reserved */ \ h->extra2 = NULL; /* reserved */ \ @@ -556,7 +612,9 @@ typedef struct ___itt_global #define NEW_STRING_HANDLE_A(gptr,h,h_tail,name) { \ h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ if (h != NULL) { \ - h->strA = name ? __itt_fstrdup(name) : NULL; \ + char *name_copy = NULL; \ + __itt_fstrdup(name, name_copy); \ + h->strA = name_copy; \ h->strW = NULL; \ h->extra1 = 0; /* reserved */ \ h->extra2 = NULL; /* reserved */ \ @@ -588,9 +646,13 @@ typedef struct ___itt_global #define NEW_COUNTER_A(gptr,h,h_tail,name,domain,type) { \ h = (__itt_counter_info_t*)malloc(sizeof(__itt_counter_info_t)); \ if (h != NULL) { \ - h->nameA = name ? __itt_fstrdup(name) : NULL; \ + char *name_copy = NULL; \ + __itt_fstrdup(name, name_copy); \ + h->nameA = name_copy; \ h->nameW = NULL; \ - h->domainA = domain ? __itt_fstrdup(domain) : NULL; \ + char *domain_copy = NULL; \ + __itt_fstrdup(domain, domain_copy); \ + h->domainA = domain_copy; \ h->domainW = NULL; \ h->type = type; \ h->index = 0; \ @@ -602,4 +664,40 @@ typedef struct ___itt_global } \ } +#define NEW_HISTOGRAM_W(gptr,h,h_tail,domain,name,x_type,y_type) { \ + h = (__itt_histogram*)malloc(sizeof(__itt_histogram)); \ + if (h != NULL) { \ + h->domain = domain; \ + h->nameA = NULL; \ + h->nameW = name ? _wcsdup(name) : NULL; \ + h->x_type = x_type; \ + h->y_type = y_type; \ + h->extra1 = 0; \ + h->extra2 = NULL; \ + if (h_tail == NULL) \ + (gptr)->histogram_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_HISTOGRAM_A(gptr,h,h_tail,domain,name,x_type,y_type) { \ + h = (__itt_histogram*)malloc(sizeof(__itt_histogram)); \ + if (h != NULL) { \ + h->domain = domain; \ + char *name_copy = NULL; \ + __itt_fstrdup(name, name_copy); \ + h->nameA = name_copy; \ + h->nameW = NULL; \ + h->x_type = x_type; \ + h->y_type = y_type; \ + h->extra1 = 0; \ + h->extra2 = NULL; \ + if (h_tail == NULL) \ + (gptr)->histogram_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + #endif /* _ITTNOTIFY_CONFIG_H_ */ diff --git a/src/tbb/tools_api/ittnotify_static.c b/src/tbb/tools_api/ittnotify_static.c index 44dc8a027d..0b9aa492ac 100644 --- a/src/tbb/tools_api/ittnotify_static.c +++ b/src/tbb/tools_api/ittnotify_static.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,13 +14,14 @@ limitations under the License. */ +#define INTEL_NO_MACRO_BODY +#define INTEL_ITTNOTIFY_API_PRIVATE #include "ittnotify_config.h" #if ITT_PLATFORM==ITT_PLATFORM_WIN -#ifdef PATH_MAX -#undef PATH_MAX -#endif +#if !defined(PATH_MAX) #define PATH_MAX 512 +#endif #else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ #include #include @@ -31,8 +32,6 @@ #include #include -#define INTEL_NO_MACRO_BODY -#define INTEL_ITTNOTIFY_API_PRIVATE #include "ittnotify.h" #include "legacy/ittnotify.h" @@ -138,7 +137,7 @@ static const char* ittnotify_lib_name = "libittnotify.dylib"; { \ if (!p.mutex_initialized) \ { \ - if (__itt_interlocked_increment(&p.atomic_counter) == 1) \ + if (__itt_interlocked_compare_exchange(&p.atomic_counter, 1, 0) == 0) \ { \ __itt_mutex_init(&p.mutex); \ p.mutex_initialized = 1; \ @@ -151,6 +150,20 @@ static const char* ittnotify_lib_name = "libittnotify.dylib"; } \ } +#define ITT_MUTEX_DESTROY(p) { \ + if (PTHREAD_SYMBOLS) \ + { \ + if (p.mutex_initialized) \ + { \ + if (__itt_interlocked_compare_exchange(&p.atomic_counter, 0, 1) == 1) \ + { \ + __itt_mutex_destroy(&p.mutex); \ + p.mutex_initialized = 0; \ + } \ + } \ + } \ +} + #define ITT_MODULE_OBJECT_VERSION 1 typedef int (__itt_init_ittlib_t)(const char*, __itt_group_id); @@ -242,9 +255,11 @@ static __itt_group_alias group_alias[] = { #pragma pack(pop) -#if ITT_PLATFORM==ITT_PLATFORM_WIN && _MSC_VER +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if _MSC_VER #pragma warning(push) #pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ +#endif #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ static __itt_api_info api_list[] = { @@ -265,8 +280,10 @@ static __itt_api_info api_list[] = { {NULL, NULL, NULL, NULL, __itt_group_none} }; -#if ITT_PLATFORM==ITT_PLATFORM_WIN && _MSC_VER +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if _MSC_VER #pragma warning(pop) +#endif #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ /* static part descriptor which handles. all notification api attributes. */ @@ -285,9 +302,10 @@ __itt_global _N_(_ittapi_global) = { NULL, /* thread_list */ NULL, /* domain_list */ NULL, /* string_list */ - __itt_collection_normal, /* collection state */ + __itt_collection_uninitialized, /* collection state */ NULL, /* counter_list */ - 0 /* ipt_collect_events */ + 0, /* ipt_collect_events */ + NULL /* histogram_list */ }; typedef void (__itt_api_init_t)(__itt_global*, __itt_group_id); @@ -300,9 +318,11 @@ static __itt_domain dummy_domain; ITT_EXTERN_C void _N_(error_handler)(__itt_error_code, va_list args); #endif /* ITT_NOTIFY_EXT_REPORT */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN && _MSC_VER +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if _MSC_VER #pragma warning(push) #pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ +#endif #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ static void __itt_report_error(int code, ...) @@ -320,8 +340,12 @@ static void __itt_report_error(int code, ...) va_end(args); } -#if ITT_PLATFORM==ITT_PLATFORM_WIN && _MSC_VER +static int __itt_is_collector_available(void); + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if _MSC_VER #pragma warning(pop) +#endif #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #if ITT_PLATFORM==ITT_PLATFORM_WIN @@ -348,13 +372,16 @@ static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))( return &dummy_domain; } } - for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameW != NULL && !wcscmp(h->nameW, name)) break; - } - if (h == NULL) + if (__itt_is_collector_available()) { - NEW_DOMAIN_W(&_N_(_ittapi_global),h,h_tail,name); + for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameW != NULL && !wcscmp(h->nameW, name)) break; + } + if (h == NULL) + { + NEW_DOMAIN_W(&_N_(_ittapi_global), h, h_tail, name); + } } if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return h; @@ -398,13 +425,16 @@ static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(c return &dummy_domain; } } - for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameA != NULL && !__itt_fstrcmp(h->nameA, name)) break; - } - if (h == NULL) + if (__itt_is_collector_available()) { - NEW_DOMAIN_A(&_N_(_ittapi_global),h,h_tail,name); + for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameA != NULL && !__itt_fstrcmp(h->nameA, name)) break; + } + if (h == NULL) + { + NEW_DOMAIN_A(&_N_(_ittapi_global), h, h_tail, name); + } } if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return h; @@ -466,13 +496,16 @@ static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_cre return NULL; } } - for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) - { - if (h->strW != NULL && !wcscmp(h->strW, name)) break; - } - if (h == NULL) + if (__itt_is_collector_available()) { - NEW_STRING_HANDLE_W(&_N_(_ittapi_global),h,h_tail,name); + for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) + { + if (h->strW != NULL && !wcscmp(h->strW, name)) break; + } + if (h == NULL) + { + NEW_STRING_HANDLE_W(&_N_(_ittapi_global), h, h_tail, name); + } } __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return h; @@ -516,13 +549,16 @@ static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_cre return NULL; } } - for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) - { - if (h->strA != NULL && !__itt_fstrcmp(h->strA, name)) break; - } - if (h == NULL) + if (__itt_is_collector_available()) { - NEW_STRING_HANDLE_A(&_N_(_ittapi_global),h,h_tail,name); + for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) + { + if (h->strA != NULL && !__itt_fstrcmp(h->strA, name)) break; + } + if (h == NULL) + { + NEW_STRING_HANDLE_A(&_N_(_ittapi_global), h, h_tail, name); + } } if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return h; @@ -553,15 +589,18 @@ static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init)) return NULL; } } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + if (__itt_is_collector_available()) { - if (h->nameW != NULL && h->type == (int)type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || - (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameW != NULL && h->type == (int)type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || + (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; - } - if (h == NULL) - { - NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type); + } + if (h == NULL) + { + NEW_COUNTER_W(&_N_(_ittapi_global), h, h_tail, name, domain, type); + } } __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return (__itt_counter)h; @@ -606,14 +645,17 @@ static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))( return NULL; } } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameA != NULL && h->type == (int)type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || - (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; - } - if (h == NULL) + if (__itt_is_collector_available()) { - NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type); + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameA != NULL && h->type == (int)type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || + (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; + } + if (h == NULL) + { + NEW_COUNTER_A(&_N_(_ittapi_global), h, h_tail, name, domain, type); + } } if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return (__itt_counter)h; @@ -643,15 +685,18 @@ static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_ return NULL; } } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + if (__itt_is_collector_available()) { - if (h->nameW != NULL && h->type == (int)type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || - (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameW != NULL && h->type == (int)type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || + (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; - } - if (h == NULL) - { - NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type); + } + if (h == NULL) + { + NEW_COUNTER_W(&_N_(_ittapi_global), h, h_tail, name, domain, type); + } } __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return (__itt_counter)h; @@ -695,17 +740,114 @@ static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_i return NULL; } } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + if (__itt_is_collector_available()) + { + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameA != NULL && h->type == (int)type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || + (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; + } + if (h == NULL) + { + NEW_COUNTER_A(&_N_(_ittapi_global), h, h_tail, name, domain, type); + } + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_counter)h; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static __itt_histogram* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(histogram_createW),_init))(const __itt_domain* domain, const wchar_t* name, __itt_metadata_type x_type, __itt_metadata_type y_type) +{ + __itt_histogram *h_tail = NULL, *h = NULL; + + if (domain == NULL || name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { + if (ITTNOTIFY_NAME(histogram_createW) && ITTNOTIFY_NAME(histogram_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(histogram_createW),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(histogram_createW)(domain, name, x_type, y_type); + } + else + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return NULL; + } + } + if (__itt_is_collector_available()) + { + for (h_tail = NULL, h = _N_(_ittapi_global).histogram_list; h != NULL; h_tail = h, h = h->next) + { + if (h->domain == NULL) continue; + else if (h->domain == domain && h->nameW != NULL && !wcscmp(h->nameW, name)) break; + } + if (h == NULL) + { + NEW_HISTOGRAM_W(&_N_(_ittapi_global), h, h_tail, domain, name, x_type, y_type); + } + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_histogram*)h; +} + +static __itt_histogram* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(histogram_createA),_init))(const __itt_domain* domain, const char* name, __itt_metadata_type x_type, __itt_metadata_type y_type) +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +static __itt_histogram* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(histogram_create),_init))(const __itt_domain* domain, const char* name, __itt_metadata_type x_type, __itt_metadata_type y_type) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + __itt_histogram *h_tail = NULL, *h = NULL; + + if (domain == NULL || name == NULL) { - if (h->nameA != NULL && h->type == (int)type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || - (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; + return NULL; } - if (h == NULL) + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) { - NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type); +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(histogram_createA) && ITTNOTIFY_NAME(histogram_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(histogram_createA),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(histogram_createA)(domain, name, x_type, y_type); + } +#else + if (ITTNOTIFY_NAME(histogram_create) && ITTNOTIFY_NAME(histogram_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(histogram_create),_init))) + { + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(histogram_create)(domain, name, x_type, y_type); + } +#endif + else + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#else + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#endif + return NULL; + } + } + if (__itt_is_collector_available()) + { + for (h_tail = NULL, h = _N_(_ittapi_global).histogram_list; h != NULL; h_tail = h, h = h->next) + { + if (h->domain == NULL) continue; + else if (h->domain == domain && h->nameA != NULL && !__itt_fstrcmp(h->nameA, name)) break; + } + if (h == NULL) + { + NEW_HISTOGRAM_A(&_N_(_ittapi_global), h, h_tail, domain, name, x_type, y_type); + } } if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return (__itt_counter)h; + return (__itt_histogram*)h; } /* -------------------------------------------------------------------------- */ @@ -720,10 +862,6 @@ static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void) { ITTNOTIFY_NAME(pause)(); } - else - { - _N_(_ittapi_global).state = __itt_collection_paused; - } } static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void) @@ -736,10 +874,6 @@ static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void) { ITTNOTIFY_NAME(resume)(); } - else - { - _N_(_ittapi_global).state = __itt_collection_normal; - } } #if ITT_PLATFORM==ITT_PLATFORM_WIN @@ -1143,10 +1277,27 @@ static void __itt_nullify_all_pointers(void) *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; } -#if ITT_PLATFORM==ITT_PLATFORM_WIN && _MSC_VER +static int __itt_is_collector_available(void) +{ + int is_available; + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).state == __itt_collection_uninitialized) + { + _N_(_ittapi_global).state = (NULL == __itt_get_lib_name()) ? __itt_collection_collector_absent : __itt_collection_collector_exists; + } + is_available = (_N_(_ittapi_global).state == __itt_collection_collector_exists || + _N_(_ittapi_global).state == __itt_collection_init_successful); + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return is_available; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if _MSC_VER #pragma warning(push) #pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ #pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ +#endif #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ ITT_EXTERN_C void _N_(fini_ittlib)(void) @@ -1186,6 +1337,64 @@ ITT_EXTERN_C void _N_(fini_ittlib)(void) } } +/* !!! this function should be called under mutex lock !!! */ +static void __itt_free_allocated_resources(void) +{ + __itt_string_handle* current_string = _N_(_ittapi_global).string_list; + while (current_string != NULL) + { + __itt_string_handle* tmp = current_string->next; + free((char*)current_string->strA); +#if ITT_PLATFORM==ITT_PLATFORM_WIN + free((wchar_t*)current_string->strW); +#endif + free(current_string); + current_string = tmp; + } + _N_(_ittapi_global).string_list = NULL; + + __itt_domain* current_domain = _N_(_ittapi_global).domain_list; + while (current_domain != NULL) + { + __itt_domain* tmp = current_domain->next; + free((char*)current_domain->nameA); +#if ITT_PLATFORM==ITT_PLATFORM_WIN + free((wchar_t*)current_domain->nameW); +#endif + free(current_domain); + current_domain = tmp; + } + _N_(_ittapi_global).domain_list = NULL; + + __itt_counter_info_t* current_couter = _N_(_ittapi_global).counter_list; + while (current_couter != NULL) + { + __itt_counter_info_t* tmp = current_couter->next; + free((char*)current_couter->nameA); + free((char*)current_couter->domainA); +#if ITT_PLATFORM==ITT_PLATFORM_WIN + free((wchar_t*)current_couter->nameW); + free((wchar_t*)current_couter->domainW); +#endif + free(current_couter); + current_couter = tmp; + } + _N_(_ittapi_global).counter_list = NULL; + + __itt_histogram* current_histogram = _N_(_ittapi_global).histogram_list; + while (current_histogram != NULL) + { + __itt_histogram* tmp = current_histogram->next; + free((char*)current_histogram->nameA); +#if ITT_PLATFORM==ITT_PLATFORM_WIN + free((wchar_t*)current_histogram->nameW); +#endif + free(current_histogram); + current_histogram = tmp; + } + _N_(_ittapi_global).histogram_list = NULL; +} + ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_groups) { int i; @@ -1217,6 +1426,7 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou if (_N_(_ittapi_global).lib != NULL) { + _N_(_ittapi_global).state = __itt_collection_init_successful; __itt_api_init_t* __itt_api_init_ptr; int lib_version = __itt_lib_version(_N_(_ittapi_global).lib); @@ -1277,6 +1487,8 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou } else { + _N_(_ittapi_global).state = __itt_collection_init_fail; + __itt_free_allocated_resources(); __itt_nullify_all_pointers(); __itt_report_error(__itt_error_no_module, lib_name, @@ -1290,6 +1502,7 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou } else { + _N_(_ittapi_global).state = __itt_collection_collector_absent; __itt_nullify_all_pointers(); } _N_(_ittapi_global).api_initialized = 1; @@ -1323,8 +1536,10 @@ ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t return prev; } -#if ITT_PLATFORM==ITT_PLATFORM_WIN && _MSC_VER +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if _MSC_VER #pragma warning(pop) +#endif #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ /** __itt_mark_pt_region functions marks region of interest @@ -1367,3 +1582,24 @@ ITT_EXTERN_C void _N_(mark_pt_region_end)(__itt_pt_region region) (void)region; #endif } + +ITT_EXTERN_C __itt_collection_state (_N_(get_collection_state))(void) +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + } + return _N_(_ittapi_global).state; +} + +/* !!! should be called from the library destructor !!! + * this function destroys the mutex and frees resources + * allocated by ITT API static part + */ +ITT_EXTERN_C void (_N_(release_resources))(void) +{ + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + __itt_free_allocated_resources(); + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + ITT_MUTEX_DESTROY(_N_(_ittapi_global)); +} diff --git a/src/tbb/tools_api/ittnotify_static.h b/src/tbb/tools_api/ittnotify_static.h index 0aab7c87f1..d59bfac1a2 100644 --- a/src/tbb/tools_api/ittnotify_static.h +++ b/src/tbb/tools_api/ittnotify_static.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -82,6 +82,14 @@ ITT_STUB(LIBITTAPI, int, thr_name_setW, (const wchar_t *name, int namelen), (IT ITT_STUB(LIBITTAPI, int, thr_name_set, (const char *name, int namelen), (ITT_FORMAT name, namelen), thr_name_set, __itt_group_thread | __itt_group_legacy, "\"%s\", %d") #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ ITT_STUBV(LIBITTAPI, void, thr_ignore, (void), (ITT_NO_PARAMS), thr_ignore, __itt_group_thread | __itt_group_legacy, "no args") + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_histogram*, histogram_createA, (const __itt_domain* domain, const char* name, __itt_metadata_type x_type, __itt_metadata_type y_type), (ITT_FORMAT domain, name, x_type, y_type), histogram_createA, __itt_group_structure, "%p, \"%s\", %d, %d") +ITT_STUB(ITTAPI, __itt_histogram*, histogram_createW, (const __itt_domain* domain, const wchar_t* name, __itt_metadata_type x_type, __itt_metadata_type y_type), (ITT_FORMAT domain, name, x_type, y_type), histogram_createW, __itt_group_structure, "%p, \"%s\", %d, %d") +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_histogram*, histogram_create, (const __itt_domain* domain, const char* name, __itt_metadata_type x_type, __itt_metadata_type y_type), (ITT_FORMAT domain, name, x_type, y_type), histogram_create, __itt_group_structure, "%p, \"%s\", %d, %d") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + #endif /* __ITT_INTERNAL_BODY */ ITT_STUBV(ITTAPI, void, enable_attach, (void), (ITT_NO_PARAMS), enable_attach, __itt_group_all, "no args") @@ -352,5 +360,6 @@ ITT_STUBV(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const ch #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ ITT_STUBV(ITTAPI, void, module_unload, (void *start_addr), (ITT_FORMAT start_addr), module_unload, __itt_group_module, "%p") +ITT_STUBV(ITTAPI, void, histogram_submit, (__itt_histogram* histogram, size_t length, void* x_data, void* y_data), (ITT_FORMAT histogram, length, x_data, y_data), histogram_submit, __itt_group_structure, "%p, %lu, %p, %p") #endif /* __ITT_INTERNAL_INIT */ diff --git a/src/tbb/tools_api/ittnotify_types.h b/src/tbb/tools_api/ittnotify_types.h index 7693c46f3d..1c0fded40b 100644 --- a/src/tbb/tools_api/ittnotify_types.h +++ b/src/tbb/tools_api/ittnotify_types.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/src/tbb/tools_api/legacy/ittnotify.h b/src/tbb/tools_api/legacy/ittnotify.h index 3d3561ecc4..1c40c28884 100644 --- a/src/tbb/tools_api/legacy/ittnotify.h +++ b/src/tbb/tools_api/legacy/ittnotify.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -126,7 +126,12 @@ #if ITT_PLATFORM==ITT_PLATFORM_WIN /* use __forceinline (VC++ specific) */ +#if defined(__MINGW32__) && !defined(__cplusplus) +#define ITT_INLINE static __inline__ __attribute__((__always_inline__,__gnu_inline__)) +#else #define ITT_INLINE static __forceinline +#endif /* __MINGW32__ */ + #define ITT_INLINE_ATTRIBUTE /* nothing */ #else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ /* @@ -178,20 +183,20 @@ #define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n) #define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n) -#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) -#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) -#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) -#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) -#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) -#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) -#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) -#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d) -#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x) -#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y) -#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z) -#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) -#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) -#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) +#define ITTNOTIFY_VOID_D0(n,d) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_VOID_D1(n,d,x) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_VOID_D2(n,d,x,y) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (d == NULL) ? (void)0 : (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) +#define ITTNOTIFY_DATA_D0(n,d) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_DATA_D1(n,d,x) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_DATA_D2(n,d,x,y) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (d == NULL) ? 0 : (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) #ifdef ITT_STUB #undef ITT_STUB diff --git a/src/tbbbind/CMakeLists.txt b/src/tbbbind/CMakeLists.txt index 5ca1d7679b..3e85082767 100644 --- a/src/tbbbind/CMakeLists.txt +++ b/src/tbbbind/CMakeLists.txt @@ -66,8 +66,8 @@ function(tbbbind_build TBBBIND_NAME REQUIRED_HWLOC_TARGET) if (TBB_DEF_FILE_PREFIX) # If there's no prefix, assume we're using export directives set_target_properties(${TBBBIND_NAME} PROPERTIES - LINK_FLAGS ${TBB_LINK_DEF_FILE_FLAG}${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbbbind.def - LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbbbind.def + LINK_FLAGS "${TBB_LINK_DEF_FILE_FLAG}\"${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbbbind.def\"" + LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbbbind.def" ) endif() diff --git a/src/tbbbind/tbb_bind.rc b/src/tbbbind/tbb_bind.rc index 844ae1e782..41b78ee479 100644 --- a/src/tbbbind/tbb_bind.rc +++ b/src/tbbbind/tbb_bind.rc @@ -1,4 +1,4 @@ -// Copyright (c) 2005-2022 Intel Corporation +// Copyright (c) 2005-2023 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,29 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Microsoft Visual C++ generated resource script. -// -#ifdef APSTUDIO_INVOKED -#ifndef APSTUDIO_READONLY_SYMBOLS -#define _APS_NO_MFC 1 -#define _APS_NEXT_RESOURCE_VALUE 102 -#define _APS_NEXT_COMMAND_VALUE 40001 -#define _APS_NEXT_CONTROL_VALUE 1001 -#define _APS_NEXT_SYMED_VALUE 101 -#endif -#endif - -#define APSTUDIO_READONLY_SYMBOLS ///////////////////////////////////////////////////////////////////////////// // -// Generated from the TEXTINCLUDE 2 resource. +// Includes // #include #include "../../include/oneapi/tbb/version.h" -///////////////////////////////////////////////////////////////////////////// -#undef APSTUDIO_READONLY_SYMBOLS - ///////////////////////////////////////////////////////////////////////////// // Neutral resources @@ -43,13 +27,6 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL #pragma code_page(1252) #endif //_WIN32 -///////////////////////////////////////////////////////////////////////////// -// manifest integration -#ifdef TBB_MANIFEST -#include "winuser.h" -2 RT_MANIFEST tbbmanifest.exe.manifest -#endif - ///////////////////////////////////////////////////////////////////////////// // // Version @@ -95,17 +72,3 @@ BEGIN VALUE "Translation", 0x0, 1200 END END - -//#endif // Neutral resources -///////////////////////////////////////////////////////////////////////////// - - -#ifndef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 3 resource. -// - - -///////////////////////////////////////////////////////////////////////////// -#endif // not APSTUDIO_INVOKED diff --git a/src/tbbmalloc/CMakeLists.txt b/src/tbbmalloc/CMakeLists.txt index 2a89286557..8c37a4f574 100644 --- a/src/tbbmalloc/CMakeLists.txt +++ b/src/tbbmalloc/CMakeLists.txt @@ -79,8 +79,8 @@ tbb_handle_ipo(tbbmalloc) if (TBB_DEF_FILE_PREFIX) # If there's no prefix, assume we're using export directives set_target_properties(tbbmalloc PROPERTIES - LINK_FLAGS ${TBB_LINK_DEF_FILE_FLAG}${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbbmalloc.def - LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbbmalloc.def + LINK_FLAGS "${TBB_LINK_DEF_FILE_FLAG}\"${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbbmalloc.def\"" + LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbbmalloc.def" ) endif() diff --git a/src/tbbmalloc/Customize.h b/src/tbbmalloc/Customize.h index bd62f26816..00341e887b 100644 --- a/src/tbbmalloc/Customize.h +++ b/src/tbbmalloc/Customize.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -37,12 +37,14 @@ #define MALLOC_ITT_SYNC_RELEASING(pointer) ITT_NOTIFY(sync_releasing, (pointer)) #define MALLOC_ITT_SYNC_CANCEL(pointer) ITT_NOTIFY(sync_cancel, (pointer)) #define MALLOC_ITT_FINI_ITTLIB() ITT_FINI_ITTLIB() +#define MALLOC_ITT_RELEASE_RESOURCES() ITT_RELEASE_RESOURCES() #else #define MALLOC_ITT_SYNC_PREPARE(pointer) ((void)0) #define MALLOC_ITT_SYNC_ACQUIRED(pointer) ((void)0) #define MALLOC_ITT_SYNC_RELEASING(pointer) ((void)0) #define MALLOC_ITT_SYNC_CANCEL(pointer) ((void)0) #define MALLOC_ITT_FINI_ITTLIB() ((void)0) +#define MALLOC_ITT_RELEASE_RESOURCES() ((void)0) #endif inline intptr_t BitScanRev(uintptr_t x) { diff --git a/src/tbbmalloc/frontend.cpp b/src/tbbmalloc/frontend.cpp index 3cb4bb924e..e32c240c09 100644 --- a/src/tbbmalloc/frontend.cpp +++ b/src/tbbmalloc/frontend.cpp @@ -2925,8 +2925,10 @@ extern "C" void __TBB_mallocProcessShutdownNotification(bool windows_process_dyi for( int i=1; i<=nThreads && i #include "../../include/oneapi/tbb/version.h" -///////////////////////////////////////////////////////////////////////////// -#undef APSTUDIO_READONLY_SYMBOLS - ///////////////////////////////////////////////////////////////////////////// // Neutral resources @@ -43,13 +27,6 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL #pragma code_page(1252) #endif //_WIN32 -///////////////////////////////////////////////////////////////////////////// -// manifest integration -#ifdef TBB_MANIFEST -#include "winuser.h" -2 RT_MANIFEST tbbmanifest.exe.manifest -#endif - ///////////////////////////////////////////////////////////////////////////// // // Version @@ -95,17 +72,3 @@ BEGIN VALUE "Translation", 0x0, 1200 END END - -//#endif // Neutral resources -///////////////////////////////////////////////////////////////////////////// - - -#ifndef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 3 resource. -// - - -///////////////////////////////////////////////////////////////////////////// -#endif // not APSTUDIO_INVOKED diff --git a/src/tbbmalloc_proxy/tbbmalloc_proxy.rc b/src/tbbmalloc_proxy/tbbmalloc_proxy.rc index e84ddf0b91..71277e48ac 100644 --- a/src/tbbmalloc_proxy/tbbmalloc_proxy.rc +++ b/src/tbbmalloc_proxy/tbbmalloc_proxy.rc @@ -1,4 +1,4 @@ -// Copyright (c) 2005-2022 Intel Corporation +// Copyright (c) 2005-2023 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,29 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Microsoft Visual C++ generated resource script. -// -#ifdef APSTUDIO_INVOKED -#ifndef APSTUDIO_READONLY_SYMBOLS -#define _APS_NO_MFC 1 -#define _APS_NEXT_RESOURCE_VALUE 102 -#define _APS_NEXT_COMMAND_VALUE 40001 -#define _APS_NEXT_CONTROL_VALUE 1001 -#define _APS_NEXT_SYMED_VALUE 101 -#endif -#endif - -#define APSTUDIO_READONLY_SYMBOLS ///////////////////////////////////////////////////////////////////////////// // -// Generated from the TEXTINCLUDE 2 resource. +// Includes // #include #include "../../include/oneapi/tbb/version.h" -///////////////////////////////////////////////////////////////////////////// -#undef APSTUDIO_READONLY_SYMBOLS - ///////////////////////////////////////////////////////////////////////////// // Neutral resources @@ -43,13 +27,6 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL #pragma code_page(1252) #endif //_WIN32 -///////////////////////////////////////////////////////////////////////////// -// manifest integration -#ifdef TBB_MANIFEST -#include "winuser.h" -2 RT_MANIFEST tbbmanifest.exe.manifest -#endif - ///////////////////////////////////////////////////////////////////////////// // // Version @@ -95,17 +72,3 @@ BEGIN VALUE "Translation", 0x0, 1200 END END - -//#endif // Neutral resources -///////////////////////////////////////////////////////////////////////////// - - -#ifndef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 3 resource. -// - - -///////////////////////////////////////////////////////////////////////////// -#endif // not APSTUDIO_INVOKED diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f15679e839..b789219fd7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -361,17 +361,29 @@ unset(_sde_find_name) # Common target for the tbbbind related tests add_custom_target(test_suite_arena_constraints) +# Check support for --no-as-needed linker option +if (MINGW OR NOT WIN32) + include(CheckCXXSourceCompiles) + set(CMAKE_REQUIRED_LIBRARIES "-Wl,--no-as-needed") + check_cxx_source_compiles("int main(int, char*[]) { return 0; }" LINKER_HAS_NO_AS_NEEDED) + unset(CMAKE_REQUIRED_LIBRARIES) +endif() + if (TARGET TBB::tbb) # Define the tests tbb_add_test(SUBDIR tbb NAME test_tick_count DEPENDENCIES TBB::tbb) tbb_add_test(SUBDIR tbb NAME test_allocators DEPENDENCIES TBB::tbb) tbb_add_test(SUBDIR tbb NAME test_arena_priorities DEPENDENCIES TBB::tbb) tbb_add_test(SUBDIR tbb NAME test_dynamic_link DEPENDENCIES TBB::tbb) - if (WIN32) - tbb_add_test(SUBDIR tbb NAME test_numa_dist DEPENDENCIES TBB::tbb) + if (LINKER_HAS_NO_AS_NEEDED) + # The linker may not detect a dependency on pthread in static variable constructors. + target_link_libraries(test_dynamic_link PRIVATE "-Wl,--no-as-needed") endif() if (APPLE OR ANDROID_PLATFORM) - target_link_libraries(test_dynamic_link PRIVATE -rdynamic) # for the test_dynamic_link + target_link_libraries(test_dynamic_link PRIVATE -rdynamic) + endif() + if (WIN32) + tbb_add_test(SUBDIR tbb NAME test_numa_dist DEPENDENCIES TBB::tbb) endif() tbb_add_test(SUBDIR tbb NAME test_collaborative_call_once DEPENDENCIES TBB::tbb) tbb_add_test(SUBDIR tbb NAME test_concurrent_lru_cache DEPENDENCIES TBB::tbb) diff --git a/test/common/common_arena_constraints.h b/test/common/common_arena_constraints.h index 5844d396d7..2c84b2604a 100644 --- a/test/common/common_arena_constraints.h +++ b/test/common/common_arena_constraints.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2019-2021 Intel Corporation + Copyright (c) 2019-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,8 +17,6 @@ #ifndef __TBB_test_common_arena_constraints_H_ #define __TBB_test_common_arena_constraints_H_ -#define TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION 1 - #if _WIN32 || _WIN64 #define _CRT_SECURE_NO_WARNINGS #endif diff --git a/test/common/concurrent_lru_cache_common.h b/test/common/concurrent_lru_cache_common.h index dccf5ef246..ef1dd3ac3a 100644 --- a/test/common/concurrent_lru_cache_common.h +++ b/test/common/concurrent_lru_cache_common.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include "test.h" +#include "utils.h" #include //----------------------------------------------------------------------------- @@ -114,10 +115,17 @@ namespace concurrent_lru_cache_helpers { } ~instance_counter() { - if (! --(*my_p_count)) + bool is_last = ! --(*my_p_count); +#if __GNUC__ == 12 + // GCC 12 warns about using my_p_count after delete. + // The test was investigated and no problems were detected + // The following statement silence the warning + static bool unused_is_last = is_last; + utils::suppress_unused_warning(unused_is_last); +#endif + if (is_last) delete(my_p_count); } - std::size_t instances_count() const { return *my_p_count; } }; diff --git a/test/common/doctest.h b/test/common/doctest.h index 27a5734a58..3b906764a6 100644 --- a/test/common/doctest.h +++ b/test/common/doctest.h @@ -1362,11 +1362,13 @@ DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wunused-comparison") , m_at(at) {} DOCTEST_NOINLINE operator Result() { +DOCTEST_GCC_SUPPRESS_WARNING_WITH_PUSH("-Waddress") // this is needed only for MSVC 2015: // https://ci.appveyor.com/project/onqtam/doctest/builds/38181202 DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(4800) // 'int': forcing value to bool bool res = static_cast(lhs); DOCTEST_MSVC_SUPPRESS_WARNING_POP +DOCTEST_GCC_SUPPRESS_WARNING_POP if(m_at & assertType::is_false) //!OCLINT bitwise operator in conditional res = !res; @@ -3692,6 +3694,7 @@ String toString(float in) { return fpToString(in, 5) + "f"; } String toString(double in) { return fpToString(in, 10); } String toString(double long in) { return fpToString(in, 15); } +DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wdeprecated-declarations") #define DOCTEST_TO_STRING_OVERLOAD(type, fmt) \ String toString(type in) { \ char buf[64]; \ @@ -3710,6 +3713,7 @@ DOCTEST_TO_STRING_OVERLOAD(int long, "%ld") DOCTEST_TO_STRING_OVERLOAD(int long unsigned, "%lu") DOCTEST_TO_STRING_OVERLOAD(int long long, "%lld") DOCTEST_TO_STRING_OVERLOAD(int long long unsigned, "%llu") +DOCTEST_CLANG_SUPPRESS_WARNING_POP String toString(std::nullptr_t) { return "NULL"; } diff --git a/test/common/parallel_for_each_common.h b/test/common/parallel_for_each_common.h index 3e680715c8..345d798019 100644 --- a/test/common/parallel_for_each_common.h +++ b/test/common/parallel_for_each_common.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -36,12 +36,12 @@ constexpr std::size_t depths_nubmer = 20; static std::atomic g_values_counter; -class value_t { +class value_t : public utils::NoAfterlife { size_t x; value_t& operator=(const value_t&); public: value_t(size_t xx) : x(xx) { ++g_values_counter; } - value_t(const value_t& v) : x(v.x) { ++g_values_counter; } + value_t(const value_t& v) : utils::NoAfterlife(v), x(v.x) { ++g_values_counter; } value_t(value_t&& v) : x(v.x) { ++g_values_counter; } ~value_t() { --g_values_counter; } size_t value() const volatile { return x; } diff --git a/test/common/utils.h b/test/common/utils.h index febf7e071a..5c4113deae 100644 --- a/test/common/utils.h +++ b/test/common/utils.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -32,6 +32,7 @@ #include #include #include +#include #if HARNESS_TBBMALLOC_THREAD_SHUTDOWN && __TBB_SOURCE_DIRECTLY_INCLUDED && (_WIN32 || _WIN64) #include "../../src/tbbmalloc/tbbmalloc_internal_api.h" @@ -437,6 +438,43 @@ concept well_formed_instantiation = requires { }; #endif // __TBB_CPP20_CONCEPTS_PRESENT +class LifeTrackableObject { + using set_type = std::unordered_set; + static set_type alive_objects; +public: + LifeTrackableObject() { + alive_objects.insert(this); + } + + LifeTrackableObject(const LifeTrackableObject&) { + alive_objects.insert(this); + } + + LifeTrackableObject(LifeTrackableObject&&) { + alive_objects.insert(this); + } + + LifeTrackableObject& operator=(const LifeTrackableObject&) = default; + LifeTrackableObject& operator=(LifeTrackableObject&&) = default; + + ~LifeTrackableObject() { + alive_objects.erase(this); + } + + static bool is_alive(const LifeTrackableObject& object) { + return is_alive(&object); + } + + static bool is_alive(const LifeTrackableObject* object) { + return alive_objects.find(object) != alive_objects.end(); + } + + static const set_type& set() { + return alive_objects; + } +}; +std::unordered_set LifeTrackableObject::alive_objects{}; + } // namespace utils #endif // __TBB_test_common_utils_H diff --git a/test/conformance/conformance_arena_constraints.cpp b/test/conformance/conformance_arena_constraints.cpp index 1f30d6ad59..de70fe8cc5 100644 --- a/test/conformance/conformance_arena_constraints.cpp +++ b/test/conformance/conformance_arena_constraints.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2019-2021 Intel Corporation + Copyright (c) 2019-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -57,6 +57,21 @@ TEST_CASE("Test NUMA topology traversal correctness") { REQUIRE_MESSAGE(numa_nodes_info.empty(), "Some available NUMA nodes indexes were not detected."); } +#if __HYBRID_CPUS_TESTING +//! Testing NUMA topology traversal correctness +//! \brief \ref interface \ref requirement +TEST_CASE("Test core types topology traversal correctness") { + system_info::initialize(); + std::vector core_types_info = system_info::get_cpu_kinds_info(); + std::vector core_types = tbb::info::core_types(); + + REQUIRE_MESSAGE(core_types_info.size() == core_types.size(), "Wrong core types number detected."); + for (unsigned i = 0; i < core_types.size(); ++i) { + REQUIRE_MESSAGE(core_types[i] == core_types_info[i].index, "Wrong core type index detected."); + } +} +#endif /*__HYBRID_CPUS_TESTING*/ + #else /*!__TBB_HWLOC_VALID_ENVIRONMENT*/ //! Testing NUMA support interfaces validity when HWLOC is not presented on system diff --git a/test/conformance/conformance_flowgraph.h b/test/conformance/conformance_flowgraph.h index 4eff6384fd..e3926a737c 100644 --- a/test/conformance/conformance_flowgraph.h +++ b/test/conformance/conformance_flowgraph.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2020-2021 Intel Corporation + Copyright (c) 2020-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -79,8 +79,9 @@ struct message { template typename std::enable_if::value, std::vector>::type get_values( test_push_receiver& rr ) { std::vector messages; - int val = 0; - for(V tmp(0); rr.try_get(tmp); ++val) { + V tmp(0); + + while (rr.try_get(tmp)) { messages.push_back(tmp); } return messages; @@ -89,8 +90,9 @@ typename std::enable_if::value, std::vector template typename std::enable_if::value, std::vector>::type get_values( test_push_receiver& rr ) { std::vector messages; - int val = 0; - for(V tmp; rr.try_get(tmp); ++val) { + V tmp; + + while (rr.try_get(tmp)) { messages.push_back(tmp); } return messages; diff --git a/test/conformance/conformance_join_node.cpp b/test/conformance/conformance_join_node.cpp index 153506e519..532c956593 100644 --- a/test/conformance/conformance_join_node.cpp +++ b/test/conformance/conformance_join_node.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2020-2021 Intel Corporation + Copyright (c) 2020-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -28,8 +28,8 @@ using my_input_tuple = std::tuple; std::vector get_values( conformance::test_push_receiver& rr ) { std::vector messages; - int val = 0; - for(my_input_tuple tmp(0, 0.f, input_msg(0)); rr.try_get(tmp); ++val) { + my_input_tuple tmp(0, 0.f, input_msg(0)); + while(rr.try_get(tmp)) { messages.push_back(tmp); } return messages; diff --git a/test/tbb/test_arena_constraints.cpp b/test/tbb/test_arena_constraints.cpp index 227c2679bc..9264b87056 100644 --- a/test/tbb/test_arena_constraints.cpp +++ b/test/tbb/test_arena_constraints.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2019-2021 Intel Corporation + Copyright (c) 2019-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -22,21 +22,6 @@ #include "tbb/parallel_for.h" #if __TBB_HWLOC_VALID_ENVIRONMENT -#if __HYBRID_CPUS_TESTING -//! Testing NUMA topology traversal correctness -//! \brief \ref interface \ref requirement -TEST_CASE("Test core types topology traversal correctness") { - system_info::initialize(); - std::vector core_types_info = system_info::get_cpu_kinds_info(); - std::vector core_types = tbb::info::core_types(); - - REQUIRE_MESSAGE(core_types_info.size() == core_types.size(), "Wrong core types number detected."); - for (unsigned i = 0; i < core_types.size(); ++i) { - REQUIRE_MESSAGE(core_types[i] == core_types_info[i].index, "Wrong core type index detected."); - } -} -#endif /*__HYBRID_CPUS_TESTING*/ - //! Test affinity and default_concurrency correctness for all available constraints. //! \brief \ref error_guessing TEST_CASE("Test affinity and default_concurrency correctness for all available constraints.") { diff --git a/test/tbb/test_collaborative_call_once.cpp b/test/tbb/test_collaborative_call_once.cpp index 2ffa283f55..d8ee09fda0 100644 --- a/test/tbb/test_collaborative_call_once.cpp +++ b/test/tbb/test_collaborative_call_once.cpp @@ -216,6 +216,9 @@ TEST_CASE("only calls once - stress test") { // that makes impossible to create more than ~500 threads. // Android has been added to decrease testing time. constexpr std::size_t N = tbb::detail::d0::max_nfs_size * 2; +#elif __TBB_USE_THREAD_SANITIZER + // Reduce execution time under Thread Sanitizer + constexpr std::size_t N = tbb::detail::d0::max_nfs_size + 64; #else constexpr std::size_t N = tbb::detail::d0::max_nfs_size * 4; #endif diff --git a/test/tbb/test_concurrent_lru_cache.cpp b/test/tbb/test_concurrent_lru_cache.cpp index eefd3fc1ac..00f0c8df06 100644 --- a/test/tbb/test_concurrent_lru_cache.cpp +++ b/test/tbb/test_concurrent_lru_cache.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #endif #include "common/test.h" +#include "common/utils.h" #include #include @@ -125,3 +126,28 @@ TEST_CASE("basic test for eviction of only unused items 2") { REQUIRE_MESSAGE(is_correct, "cache should not evict items in use"); } +//! \brief \ref error_guessing +TEST_CASE("basic test for handling case when number_of_lru_history_items is zero") { + auto foo = [] (int) { + return utils::LifeTrackableObject{}; + }; + using cache_type = tbb::concurrent_lru_cache; + cache_type cache{foo, 0}; + + for(int i = 0; i < 10; ++i) { + // Check that no history is stored when my_history_list_capacity is 0. + // In this case, when trying to fill the cache, the items will be deleted if reference was not taken. + const utils::LifeTrackableObject* obj_addr = &cache[1].value(); + REQUIRE_MESSAGE(utils::LifeTrackableObject::is_alive(obj_addr) == false, "when number_of_lru_history_items is zero, element must be erased after use"); + } + + cache_type::handle h = cache[1]; + const utils::LifeTrackableObject* obj_addr = &h.value(); + auto& object_set = utils::LifeTrackableObject::set(); + for(int i = 0; i < 10; ++i) { + // Verify that item will still be alive if there is a handle holding that item. + cache[1]; + REQUIRE_MESSAGE(utils::LifeTrackableObject::is_alive(obj_addr), "the object with the key=1 was destroyed but should not"); + REQUIRE_MESSAGE(object_set.size() == 1, "no other values should be added"); + } +} diff --git a/test/tbb/test_concurrent_queue_whitebox.cpp b/test/tbb/test_concurrent_queue_whitebox.cpp index 18da8def25..1ba1530e70 100644 --- a/test/tbb/test_concurrent_queue_whitebox.cpp +++ b/test/tbb/test_concurrent_queue_whitebox.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -51,7 +51,8 @@ class FloggerBody { value_type elem = value_type(thread_id); for (std::size_t i = 0; i < elem_num; ++i) { q.push(elem); - q.try_pop(elem); + bool res = q.try_pop(elem); + CHECK_FAST(res); } } @@ -83,20 +84,18 @@ void test_flogger_help( Q& q, std::size_t items_per_page ) { REQUIRE_MESSAGE(q.my_queue_representation->head_counter < hack_val, "Failed wraparound test"); } -template -void test_flogger() { - { - tbb::concurrent_queue q; - test_flogger_help(q, q.my_queue_representation->items_per_page); - } - { - tbb::concurrent_bounded_queue q; +//! \brief \ref error_guessing +TEST_CASE("Test CQ Wrapparound") { + for (int i = 0; i < 1000; ++i) { + tbb::concurrent_queue q; test_flogger_help(q, q.my_queue_representation->items_per_page); } } //! \brief \ref error_guessing -TEST_CASE("Test Wrapparound") { - test_flogger(); - // TODO: add test with unsigned char +TEST_CASE("Test CBQ Wrapparound") { + for (int i = 0; i < 1000; ++i) { + tbb::concurrent_bounded_queue q; + test_flogger_help(q, q.my_queue_representation->items_per_page); + } } diff --git a/test/tbb/test_eh_flow_graph.cpp b/test/tbb/test_eh_flow_graph.cpp index 4064c9f760..ab331551a3 100644 --- a/test/tbb/test_eh_flow_graph.cpp +++ b/test/tbb/test_eh_flow_graph.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -1909,7 +1909,7 @@ void test_indexer_node() { run_indexer_node_test, nonThrowing, isThrowing>(); g_Wakeup_Msg = "indexer_node(is,is): Missed wakeup or machine is overloaded?"; run_indexer_node_test, isThrowing, isThrowing>(); - g_Wakeup_Msg = g_Orig_Wakeup_Msg;; + g_Wakeup_Msg = g_Orig_Wakeup_Msg; } /////////////////////////////////////////////// diff --git a/test/tbb/test_eh_thread.cpp b/test/tbb/test_eh_thread.cpp index 51b97976fc..d5af9db6a0 100644 --- a/test/tbb/test_eh_thread.cpp +++ b/test/tbb/test_eh_thread.cpp @@ -54,15 +54,16 @@ void limitThreads(size_t limit) CHECK_MESSAGE(0 == ret, "setrlimit has returned an error"); } -static bool g_exception_caught = false; -static std::mutex m; -static std::condition_variable cv; -static std::atomic stop{ false }; +size_t getThreadLimit() { + rlimit rlim; + + int ret = getrlimit(RLIMIT_NPROC, &rlim); + CHECK_MESSAGE(0 == ret, "getrlimit has returned an error"); + return rlim.rlim_cur; +} static void* thread_routine(void*) { - std::unique_lock lock(m); - cv.wait(lock, [] { return stop == true; }); return nullptr; } @@ -94,32 +95,17 @@ TEST_CASE("Too many threads") { } // Some systems set really big limit (e.g. >45К) for the number of processes/threads - limitThreads(1024); - - std::thread /* isolate test */ ([] { - std::vector threads; - stop = false; - auto finalize = [&] { - stop = true; - cv.notify_all(); - for (auto& t : threads) { - t.join(); - } - }; - - for (int i = 0;; ++i) { + limitThreads(1); + if (getThreadLimit() == 1) { + for (int attempt = 0; attempt < 5; ++attempt) { Thread thread; - if (!thread.isValid()) { - break; - } - threads.push_back(thread); - if (i == 1024) { - WARN_MESSAGE(false, "setrlimit seems having no effect"); - finalize(); + if (thread.isValid()) { + WARN_MESSAGE(false, "We were able to create a thread. setrlimit seems having no effect"); + thread.join(); return; } } - g_exception_caught = false; + bool g_exception_caught = false; try { // Initialize the library to create worker threads tbb::parallel_for(0, 2, [](int) {}); @@ -132,9 +118,10 @@ TEST_CASE("Too many threads") { } // Do not CHECK to avoid memory allocation (we can be out of memory) if (!g_exception_caught) { - FAIL("No exception was caught"); + FAIL("No exception was thrown on library initialization"); } - finalize(); - }).join(); + } else { + WARN_MESSAGE(false, "setrlimit seems having no effect"); + } } #endif diff --git a/test/tbb/test_indexer_node.cpp b/test/tbb/test_indexer_node.cpp index c9f5bf3d03..4ce87e195a 100644 --- a/test/tbb/test_indexer_node.cpp +++ b/test/tbb/test_indexer_node.cpp @@ -280,7 +280,7 @@ class parallel_test { reset_outputCheck(SIZE, Count); for(int i=0; i < Count*SIZE; i++) { - CHECK_MESSAGE(outq2.try_get(v), "");; + CHECK_MESSAGE(outq2.try_get(v), ""); input_node_helper::check_value(v); } check_outputCheck(SIZE, Count); diff --git a/test/tbb/test_join_node.h b/test/tbb/test_join_node.h index d78d3e05a5..8969634e8a 100644 --- a/test/tbb/test_join_node.h +++ b/test/tbb/test_join_node.h @@ -1199,7 +1199,7 @@ class parallel_test { reset_outputCheck(TUPLE_SIZE, Count); for(int i = 0; i < Count; i++) { - CHECK_MESSAGE(outq2.try_get(v), "");; + CHECK_MESSAGE(outq2.try_get(v), ""); input_node_helper::check_value(i, v, not_out_of_order); } check_outputCheck(TUPLE_SIZE, Count); diff --git a/test/tbb/test_limiter_node.cpp b/test/tbb/test_limiter_node.cpp index 34e1db8f8f..7743a377db 100644 --- a/test/tbb/test_limiter_node.cpp +++ b/test/tbb/test_limiter_node.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -301,7 +301,7 @@ test_multifunction_to_limiter(int _max, int _nparallel) { emit_sum = 0; receive_count = 0; receive_sum = 0; - local_cnt = 0;; + local_cnt = 0; mf_node.try_put(1); g.wait_for_all(); CHECK_MESSAGE( (emit_count == receive_count), "counts do not match"); diff --git a/test/tbb/test_numa_dist.cpp b/test/tbb/test_numa_dist.cpp index 950e007f17..666e718283 100644 --- a/test/tbb/test_numa_dist.cpp +++ b/test/tbb/test_numa_dist.cpp @@ -16,6 +16,8 @@ #include "common/test.h" +#if !__TBB_WIN8UI_SUPPORT + #include #include "tbb/parallel_for.h" #include "tbb/global_control.h" @@ -156,3 +158,5 @@ TEST_CASE("Double threads") { #if _MSC_VER #pragma warning (pop) #endif + +#endif // !__TBB_WIN8UI_SUPPORT diff --git a/test/tbb/test_parallel_sort.cpp b/test/tbb/test_parallel_sort.cpp index e4e9451b83..595e0deebf 100644 --- a/test/tbb/test_parallel_sort.cpp +++ b/test/tbb/test_parallel_sort.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -30,7 +30,6 @@ #include #include #include -#include #include #include @@ -99,13 +98,7 @@ void set(Minimal& minimal_ref, ValueType new_value) { template void set(std::string& string_ref, KeyType key) { - static char buffer[20]; -#if _MSC_VER && __STDC_SECURE_LIB__>=200411 - sprintf_s(buffer, sizeof(buffer), "%f", static_cast(key)); -#else - sprintf(buffer, "%f", static_cast(key)); -#endif - string_ref = buffer; + string_ref = std::to_string(static_cast(key)); } diff --git a/test/tbb/test_partitioner.cpp b/test/tbb/test_partitioner.cpp index e00eb5b665..b78fe208fa 100644 --- a/test/tbb/test_partitioner.cpp +++ b/test/tbb/test_partitioner.cpp @@ -41,7 +41,12 @@ template float test(PerBodyFunc&& body) { tbb::global_control concurrency(tbb::global_control::max_allowed_parallelism, num_threads); tbb::task_arena big_arena(static_cast(num_threads)); +#if __TBB_USE_THREAD_SANITIZER + // Reduce execution time under Thread Sanitizer + const std::size_t repeats = 50; +#else const std::size_t repeats = 100; +#endif const std::size_t per_thread_iters = 1000; using range = std::pair; diff --git a/test/tbbmalloc/test_malloc_overload.cpp b/test/tbbmalloc/test_malloc_overload.cpp index f27da9916c..7f8c3489e6 100644 --- a/test/tbbmalloc/test_malloc_overload.cpp +++ b/test/tbbmalloc/test_malloc_overload.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -449,10 +449,19 @@ TEST_CASE("Main set of tests") { CheckMemalignFuncOverload(aligned_alloc, free); #endif -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#if __INTEL_COMPILER + #pragma warning(push) + #pragma warning(disable: 1478) +#elif __GNUC__ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif struct mallinfo info = mallinfo(); -#pragma GCC diagnostic pop +#if __INTEL_COMPILER + #pragma warning(pop) +#elif __GNUC__ + #pragma GCC diagnostic pop +#endif // right now mallinfo initialized by zero REQUIRE((!info.arena && !info.ordblks && !info.smblks && !info.hblks && !info.hblkhd && !info.usmblks && !info.fsmblks diff --git a/third-party-programs.txt b/third-party-programs.txt index 40d6ce3b82..b555450a92 100644 --- a/third-party-programs.txt +++ b/third-party-programs.txt @@ -13,7 +13,7 @@ terms are listed below. _______________________________________________________________________________________________________ 1. Intel(R) Instrumentation and Tracing Technology (ITT) - Copyright (c) 2019 Intel Corporation. All rights reserved. + Copyright (c) 2022 Intel Corporation. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -385,7 +385,7 @@ ________________________________________________________________________________ 5. Doctest - Copyright (c) 2016-2019 Viktor Kirilov + Copyright (c) 2016-2021 Viktor Kirilov The MIT License (MIT)