Skip to content

Commit

Permalink
Commit oneTBB source code 3b16143
Browse files Browse the repository at this point in the history
  • Loading branch information
tbbdev committed Mar 10, 2023
1 parent c949771 commit 3eb1ff7
Show file tree
Hide file tree
Showing 87 changed files with 1,121 additions and 623 deletions.
2 changes: 1 addition & 1 deletion .bazelversion
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5.0.0
6.0.0
8 changes: 4 additions & 4 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ cc_library(
"include/oneapi/tbb/detail/*.h",
]),
copts = ["-w"] + select({
"@bazel_tools//platforms:windows": [""],
"@platforms//os:windows": [""],
"//conditions:default": ["-mwaitpkg"],
}),
defines =
Expand All @@ -47,16 +47,16 @@ cc_library(
],
}) +
select({
"@bazel_tools//platforms:osx": ["_XOPEN_SOURCE"],
"@platforms//os:osx": ["_XOPEN_SOURCE"],
"//conditions:default": [],
}),
includes = [
"include",
],
linkopts =
select({
"@bazel_tools//platforms:windows": [],
"@bazel_tools//platforms:linux": [
"@platforms//os:windows": [],
"@platforms//os:linux": [
"-ldl",
"-pthread",
"-lrt",
Expand Down
2 changes: 1 addition & 1 deletion Bazel.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ example

_WORKSPACE.bazel_:
```python
load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
load("@platforms//tools/build_defs/repo:git.bzl", "git_repository")

git_repository(
name = "oneTBB",
Expand Down
47 changes: 31 additions & 16 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2022 Intel Corporation
# Copyright (c) 2020-2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -216,19 +216,17 @@ else()
if (TBB_BUILD)
add_subdirectory(src/tbb)
endif()
if (NOT "${CMAKE_SYSTEM_PROCESSOR}" MATCHES "mips")
if (TBBMALLOC_BUILD)
add_subdirectory(src/tbbmalloc)
if(TBBMALLOC_PROXY_BUILD AND NOT "${MSVC_CXX_ARCHITECTURE_ID}" MATCHES "ARM64")
add_subdirectory(src/tbbmalloc_proxy)
endif()
endif()
if (APPLE OR NOT BUILD_SHARED_LIBS)
message(STATUS "TBBBind build targets are disabled due to unsupported environment")
else()
add_subdirectory(src/tbbbind)
if (TBBMALLOC_BUILD)
add_subdirectory(src/tbbmalloc)
if(TBBMALLOC_PROXY_BUILD AND NOT "${MSVC_CXX_ARCHITECTURE_ID}" MATCHES "ARM64")
add_subdirectory(src/tbbmalloc_proxy)
endif()
endif()
if (APPLE OR NOT BUILD_SHARED_LIBS)
message(STATUS "TBBBind build targets are disabled due to unsupported environment")
else()
add_subdirectory(src/tbbbind)
endif()

# -------------------------------------------------------------------
# Installation instructions
Expand Down Expand Up @@ -279,10 +277,27 @@ endif()

if (ANDROID_PLATFORM)
if ("${ANDROID_STL}" STREQUAL "c++_shared")
configure_file(
"${ANDROID_NDK}/sources/cxx-stl/llvm-libc++/libs/${ANDROID_ABI}/libc++_shared.so"
"${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libc++_shared.so"
COPYONLY)
if (${ANDROID_NDK_MAJOR} GREATER_EQUAL "25")
if(ANDROID_ABI STREQUAL "arm64-v8a")
set(ANDROID_TOOLCHAIN_NAME "aarch64-linux-android")
elseif(ANDROID_ABI STREQUAL "x86_64")
set(ANDROID_TOOLCHAIN_NAME "x86_64-linux-android")
elseif(ANDROID_ABI STREQUAL "armeabi-v7a")
set(ANDROID_TOOLCHAIN_NAME "arm-linux-androideabi")
elseif(ANDROID_ABI STREQUAL "x86")
set(ANDROID_TOOLCHAIN_NAME "i686-linux-android")
endif()

configure_file(
"${ANDROID_TOOLCHAIN_ROOT}/sysroot/usr/lib/${ANDROID_TOOLCHAIN_NAME}/libc++_shared.so"
"${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libc++_shared.so"
COPYONLY)
else()
configure_file(
"${ANDROID_NDK}/sources/cxx-stl/llvm-libc++/libs/${ANDROID_ABI}/libc++_shared.so"
"${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libc++_shared.so"
COPYONLY)
endif()
endif()
# This custom target may be implemented without separate CMake script, but it requires
# ADB(Android Debug Bridge) executable file availability, so to incapsulate this requirement
Expand Down
13 changes: 13 additions & 0 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,19 @@ cmake <options> ..
cpack
```

## Installation from vcpkg

You can download and install oneTBB using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager:
```sh
git clone https://github.com/Microsoft/vcpkg.git
cd vcpkg
./bootstrap-vcpkg.sh #.\bootstrap-vcpkg.bat(for Windows)
./vcpkg integrate install
./vcpkg install tbb
```

The oneTBB port in vcpkg is kept up to date by Microsoft* team members and community contributors. If the version is out of date, create an issue or pull request on the [vcpkg repository](https://github.com/Microsoft/vcpkg).

## Example of Installation

### Single-configuration generators
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ Refer to oneTBB [examples](examples) and [samples](https://github.com/oneapi-src

oneTBB is a part of [oneAPI](https://oneapi.io). The current branch implements version 1.1 of oneAPI Specification.

> **_NOTE:_** Threading Building Blocks (TBB) is now called oneAPI Threading Building Blocks (oneTBB) to highlight that the tool is a part of the oneAPI ecosystem.
## Release Information
Here are [Release Notes](RELEASE_NOTES.md) and [System Requirements](SYSTEM_REQUIREMENTS.md).

Expand Down
23 changes: 6 additions & 17 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,36 +18,25 @@
This document contains changes of oneTBB compared to the last release.

## Table of Contents <!-- omit in toc -->
- [New Features](#new_features)
- [Known Limitations](#known-limitations)
- [Fixed Issues](#fixed-issues)
- [Open-source Contributions Integrated](#open-source-contributions-integrated)

## :white_check_mark: New Features
- Improved support and use of the latest C++ standards for parallel_sort that allows using this algorithm with user-defined and standard library-defined objects with modern semantics.
- The following features are now fully functional: task_arena extensions, collaborative_call_once, adaptive mutexes, heterogeneous overloads for concurrent_hash_map, and task_scheduler_handle.
- Added support for Windows* Server 2022 and Python 3.10.

## :rotating_light: Known Limitations
- A static assert causes compilation failures in oneTBB headers when compiling with Clang* 12.0.0 or newer if using the LLVM* standard library with -ffreestanding and C++11/14 compiler options.
- An application using Parallel STL algorithms in libstdc++ versions 9 and 10 may fail to compile due to incompatible interface changes between earlier versions of Threading Building Blocks (TBB) and oneAPI Threading Building Blocks (oneTBB). Disable support for Parallel STL algorithms by defining PSTL_USE_PARALLEL_POLICIES (in libstdc++ 9) or _GLIBCXX_USE_TBB_PAR_BACKEND (in libstdc++ 10) macro to zero before inclusion of the first standard header file in each translation unit.
- On Linux* OS, if oneAPI Threading Building Blocks (oneTBB) or Threading Building Blocks (TBB) are installed in a system folder like /usr/lib64, the application may fail to link due to the order in which the linker searches for libraries. Use the -L linker option to specify the correct location of oneTBB library. This issue does not affect the program execution.
- The oneapi::tbb::info namespace interfaces might unexpectedly change the process affinity mask on Windows* OS systems (see https://github.com/open-mpi/hwloc/issues/366 for details) when using hwloc version lower than 2.5.
- Using a hwloc version other than 1.11, 2.0, or 2.5 may cause an undefined behavior on Windows OS. See https://github.com/open-mpi/hwloc/issues/477 for details.
- Using a hwloc version other than 1.11, 2.0, or 2.5 may cause an undefined behavior on Windows* OS. See https://github.com/open-mpi/hwloc/issues/477 for details.
- The NUMA topology may be detected incorrectly on Windows OS machines where the number of NUMA node threads exceeds the size of 1 processor group.
- On Windows OS on ARM64*, when compiling an application using oneTBB with the Microsoft* Compiler, the compiler issues a warning C4324 that a structure was padded due to the alignment specifier. Consider suppressing the warning by specifying /wd4324 to the compiler command line.
- oneTBB does not support fork(), to work-around the issue, consider using task_scheduler_handle to join oneTBB worker threads before using fork().
- C++ exception handling mechanism on Windows* OS on ARM64* might corrupt memory if an exception is thrown from any oneTBB parallel algorithm (see Windows* OS on ARM64* compiler issue: https://developercommunity.visualstudio.com/t/ARM64-incorrect-stack-unwinding-for-alig/1544293).

## :hammer: Fixed Issues
- Memory allocator crash on a system with an incomplete /proc/meminfo (GitHub* [#584](https://github.com/oneapi-src/oneTBB/issues/584)).
- Incorrect blocking of task stealing (GitHub* #[478](https://github.com/oneapi-src/oneTBB/issues/478)).
- Hang due to incorrect decrement of a limiter_node (GitHub* [#634](https://github.com/oneapi-src/oneTBB/issues/634)).
- Memory corruption in some rare cases when passing big messages in a flow graph (GitHub* [#639](https://github.com/oneapi-src/oneTBB/issues/639)).
- Possible deadlock in a throwable flow graph node with a lightweight policy. The lightweight policy is now ignored for functors that can throw exceptions (GitHub* [#420](https://github.com/oneapi-src/oneTBB/issues/420)).
- Crash when obtaining a range from empty ordered and unordered containers (GitHub* [#641](https://github.com/oneapi-src/oneTBB/issues/641)).
- Deadlock in a concurrent_vector resize() that could happen when the new size is less than the previous size (GitHub* [#733](https://github.com/oneapi-src/oneTBB/issues/733)).
- Memory allocator crash when allocating ~1TB on 64-bit systems (GitHub* [#838](https://github.com/oneapi-src/oneTBB/issues/838)).
- Fixed thread distribution over NUMA nodes on Windows* OS systems.
- For oneapi::tbb::suspend, it is guaranteed that the user-specified callable object is executed by the calling thread.

## :octocat: Open-source Contributions Integrated
- Improved aligned memory allocation. Contributed by Andrey Semashev (https://github.com/oneapi-src/oneTBB/pull/671).
- Optimized usage of atomic_fence on IA-32 and Intel(R) 64 architectures. Contributed by Andrey Semashev (https://github.com/oneapi-src/oneTBB/pull/328).
- Fixed incorrect definition of the assignment operator in containers. Contributed by Andrey Semashev (https://github.com/oneapi-src/oneTBB/issues/372).
- Fix for full LTO* build, library and tests, on UNIX* OS systems. Contributed by Vladislav Shchapov (https://github.com/oneapi-src/oneTBB/pull/798).
7 changes: 7 additions & 0 deletions SYSTEM_REQUIREMENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ This document provides details about hardware, operating system, and software pr
- [Supported Hardware](#supported-hardware)
- [Software](#software)
- [Supported Operating Systems](#supported-operating-systems)
- [Community-Supported Platforms](#community-supported-platforms)
- [Supported Compilers](#supported-compilers)


Expand Down Expand Up @@ -54,6 +55,12 @@ This document provides details about hardware, operating system, and software pr
- Systems with Android* operating systems
- Android* 9

### Community-Supported Platforms
- MinGW*
- FreeBSD*
- Microsoft* Windows* on ARM*/ARM64*
- macOS* on ARM64*

### Supported Compilers
- Intel* oneAPI DPC++/C++ Compiler
- Intel* C++ Compiler 19.0 and 19.1 version
Expand Down
5 changes: 4 additions & 1 deletion cmake/compilers/GNU.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2022 Intel Corporation
# Copyright (c) 2020-2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -68,6 +68,9 @@ if (MINGW AND CMAKE_SYSTEM_PROCESSOR MATCHES "i.86")
list (APPEND TBB_COMMON_COMPILE_FLAGS -msse2)
endif ()

# Gnu flags to prevent compiler from optimizing out security checks
set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} -fno-strict-overflow -fno-delete-null-pointer-checks -fwrapv)

# TBB malloc settings
set(TBBMALLOC_LIB_COMPILE_FLAGS -fno-rtti -fno-exceptions)
set(TBB_OPENMP_FLAG -fopenmp)
3 changes: 2 additions & 1 deletion cmake/sanitize.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021 Intel Corporation
# Copyright (c) 2020-2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -39,4 +39,5 @@ set(TBB_TESTS_ENVIRONMENT ${TBB_TESTS_ENVIRONMENT}
"LSAN_OPTIONS=suppressions=${CMAKE_CURRENT_SOURCE_DIR}/cmake/suppressions/lsan.suppressions")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TBB_SANITIZE_OPTION}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${TBB_SANITIZE_OPTION}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${TBB_SANITIZE_OPTION}")
2 changes: 0 additions & 2 deletions doc/main/reference/reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,5 @@ The key properties of a preview feature are:
scalable_memory_pools
helpers_for_expressing_graphs
concurrent_lru_cache_cls
constraints_extensions
info_namespace_extensions
task_group_extensions
custom_mutex_chmap
17 changes: 17 additions & 0 deletions doc/main/tbb_userguide/Flow-Graph-exception-tips.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
.. _Flow_Graph_exception_tips:

Flow Graph Tips for Exception Handling and Cancellation
=======================================================


The execution of a flow graph can be canceled directly or as a result of
an exception that propagates beyond a node's body. You can then
optionally reset the graph so that it can be re-executed.

.. toctree::
:maxdepth: 4

../tbb_userguide/catching_exceptions
../tbb_userguide/cancel_a_graph
../tbb_userguide/use_graph_reset
../tbb_userguide/cancelling_nested_parallelism
11 changes: 11 additions & 0 deletions doc/main/tbb_userguide/Flow-Graph-waiting-tips.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.. _Flow_Graph_waiting_tips:

Flow Graph Tips for Waiting for and Destroying a Flow Graph
===========================================================

.. toctree::
:maxdepth: 4

../tbb_userguide/always_use_wait_for_all
../tbb_userguide/avoid_dynamic_node_removal
../tbb_userguide/destroy_graphs_outside_main_thread
5 changes: 3 additions & 2 deletions doc/main/tbb_userguide/Guiding_Task_Scheduler_Execution.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,9 @@ assign a NUMA node identifier to the ``task_arena::constraints::numa_id`` field.

The processors with `Intel® Hybrid Technology <https://www.intel.com/content/www/us/en/products/docs/processors/core/core-processors-with-hybrid-technology-brief.html>`_
contain several core types, each is suited for different purposes.
For example, some applications may improve their performance by preferring execution on the most performant cores.
To set execution preference, assign specific core type identifier to the ``task_arena::constraints::core_type`` field.
In most cases, systems with hybrid CPU architecture show reasonable performance without involving additional API calls.
However, in some exceptional scenarios, performance may be tuned by setting the preferred core type.
To set the preferred core type for the execution, assign a specific core type identifier to the ``task_arena::constraints::core_type`` field.

The example shows how to set the most performant core type as preferable for work execution:

Expand Down
2 changes: 1 addition & 1 deletion doc/main/tbb_userguide/Memory_Allocation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,4 @@ redirect the standard routines to these functions.

../tbb_userguide/Which_Dynamic_Libraries_to_Use
../tbb_userguide/Allocator_Configuration
../tbb_userguide/Automically_Replacing_malloc
../tbb_userguide/automatically-replacing-malloc
2 changes: 1 addition & 1 deletion doc/main/tbb_userguide/Migration_Guide/Task_API.rst
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ is not guaranteed to be executed next by the current thread.
tbb::task_spawn(child);
root.wait_for_all();;
root.wait_for_all();
}
In oneTBB, this can be done using ``oneapi::tbb::task_group``.
Expand Down
14 changes: 7 additions & 7 deletions doc/main/tbb_userguide/Working_on_the_Assembly_Line_pipeline.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,13 @@ the overhead of copying a ``TextSlice``.
oneapi::tbb::parallel_pipeline(
ntoken,
oneapi::tbb::make_filter<void,TextSlice*>(
oneapi::tbb::filter::serial_in_order, MyInputFunc(input_file) )
oneapi::tbb::filter_mode::serial_in_order, MyInputFunc(input_file) )
&
oneapi::tbb::make_filter<TextSlice*,TextSlice*>(
oneapi::tbb::filter::parallel, MyTransformFunc() )
oneapi::tbb::filter_mode::parallel, MyTransformFunc() )
&
oneapi::tbb::make_filter<TextSlice*,void>(
oneapi::tbb::filter::serial_in_order, MyOutputFunc(output_file) ) );
oneapi::tbb::filter_mode::serial_in_order, MyOutputFunc(output_file) ) );
}


Expand Down Expand Up @@ -172,13 +172,13 @@ equivalent version of the previous example that does this follows:


void RunPipeline( int ntoken, FILE* input_file, FILE* output_file ) {
oneapi::tbb::filter<void,TextSlice*> f1( oneapi::tbb::filter::serial_in_order,
oneapi::tbb::filter_mode<void,TextSlice*> f1( oneapi::tbb::filter_mode::serial_in_order,
MyInputFunc(input_file) );
oneapi::tbb::filter<TextSlice*,TextSlice*> f2(oneapi::tbb::filter::parallel,
oneapi::tbb::filter_mode<TextSlice*,TextSlice*> f2(oneapi::tbb::filter_mode::parallel,
MyTransformFunc() );
oneapi::tbb::filter<TextSlice*,void> f3(oneapi::tbb::filter::serial_in_order,
oneapi::tbb::filter_mode<TextSlice*,void> f3(oneapi::tbb::filter_mode::serial_in_order,
MyOutputFunc(output_file) );
oneapi::tbb::filter<void,void> f = f1 & f2 & f3;
oneapi::tbb::filter_mode<void,void> f = f1 & f2 & f3;
oneapi::tbb::parallel_pipeline(ntoken,f);
}

Expand Down
22 changes: 22 additions & 0 deletions doc/main/tbb_userguide/automatically-replacing-malloc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
.. _automatically-replacing-malloc:

Automatically Replacing ``malloc`` and Other C/C++ Functions for Dynamic Memory Allocation
==========================================================================================


On Windows*, Linux\* operating systems, it is possible to automatically
replace all calls to standard functions for dynamic memory allocation
(such as ``malloc``) with the |full_name| scalable equivalents.
Doing so can sometimes improve application performance.


Replacements are provided by the proxy library (the library names can be
found in platform-specific sections below). A proxy library and a
scalable memory allocator library should be taken from the same release
of oneTBB, otherwise the libraries may be mutually incompatible.

.. toctree::
:maxdepth: 4

../tbb_userguide/Windows_C_Dynamic_Memory_Interface_Replacement
../tbb_userguide/Linux_C_Dynamic_Memory_Interface_Replacement
4 changes: 0 additions & 4 deletions doc/main/tbb_userguide/snippets/flow_graph_examples.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@
/* Flow Graph Code Example for the Userguide.
*/

//! Enable extended task_arena constraints feature for supporting Intel Hybrid Technology
//! and Intel Hyper-Threading Technology.
#define TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION 1

#include <oneapi/tbb/flow_graph.h>
#include <vector>

Expand Down
Loading

0 comments on commit 3eb1ff7

Please sign in to comment.