From 62b29550d1e7c2f96067b7934af0e372744a531c Mon Sep 17 00:00:00 2001 From: tbbdev Date: Thu, 15 Sep 2016 14:57:37 +0300 Subject: [PATCH] Committing Intel(R) TBB 4.4 Update 1 source code --- CHANGES | 50 ++++++ README.md | 4 +- build/android.inc | 2 + build/linux.gcc.inc | 2 +- build/windows.icl.inc | 3 + doc/Release_Notes.txt | 11 ++ doc/html/a00241.html | 18 +- doc/html/a00256.html | 34 ++-- examples/common/toolset.props | 2 +- examples/graph/index.html | 2 + examples/index.html | 2 +- .../fractal/Makefile | 16 +- .../fractal/Makefile.windows | 0 .../fractal/fractal.cpp | 116 ++++++++----- .../fractal/fractal.h | 69 ++++++-- .../fractal/fractal_video.h | 30 +++- .../fractal/index.html | 30 +++- .../fractal/main.cpp | 0 .../fractal/msvs/fractal.sln | 0 .../fractal/msvs/fractal.vcxproj | 0 .../fractal/msvs/gui.ico | Bin .../fractal/msvs/gui.rc | 0 .../fractal/msvs/resource.h | 0 .../fractal/msvs/small.ico | Bin .../xcode/fractal.xcodeproj/project.pbxproj | 0 .../xcschemes/tbbExample.xcscheme | 0 .../{task_priority => task_arena}/index.html | 0 include/tbb/concurrent_vector.h | 12 +- include/tbb/flow_graph.h | 2 +- include/tbb/internal/_flow_graph_node_impl.h | 2 +- include/tbb/machine/gcc_generic.h | 65 ++++++- include/tbb/parallel_for_each.h | 66 +++++-- include/tbb/task.h | 4 + include/tbb/tbb_config.h | 33 ++-- include/tbb/tbb_stddef.h | 2 +- index.src.html => index.html | 35 ++-- jni/Application.mk | 4 +- src/perf/time_hash_map_fill.cpp | 6 +- src/perf/time_parallel_for_each.cpp | 70 ++++++++ src/rml/test/test_rml_mixed.cpp | 2 +- src/tbb/arena.cpp | 158 ++++++++++------- src/tbb/arena.h | 16 +- src/tbb/cache_aligned_allocator.cpp | 24 +-- src/tbb/concurrent_vector.cpp | 25 ++- src/tbb/governor.cpp | 57 +++--- src/tbb/governor.h | 2 - src/tbb/market.cpp | 13 +- src/tbb/market.h | 2 +- src/tbb/scheduler.cpp | 12 +- src/tbb/scheduler.h | 7 +- src/tbb/task_group_context.cpp | 17 +- src/tbb/tbb_misc.cpp | 6 +- src/tbbmalloc/backend.cpp | 42 ++++- src/tbbmalloc/frontend.cpp | 35 ++-- src/tbbmalloc/proxy.cpp | 63 ++++--- src/tbbmalloc/tbbmalloc_internal.h | 59 +++++-- src/test/harness.h | 2 +- src/test/harness_allocator_overload.h | 39 +++++ src/test/harness_defs.h | 2 +- src/test/harness_iterator.h | 8 +- src/test/harness_tsx.h | 8 +- src/test/test_allocator.h | 32 ++++ src/test/test_atomic.cpp | 2 +- src/test/test_malloc_atexit.cpp | 10 +- src/test/test_malloc_overload.cpp | 33 ++-- src/test/test_malloc_pools.cpp | 137 ++++++++------- src/test/test_task_arena.cpp | 162 ++++++++++++++++-- src/test/test_task_group.cpp | 13 ++ src/test/test_task_priority.cpp | 19 ++ src/test/test_tbb_fork.cpp | 28 ++- src/test/test_tbb_version.cpp | 2 +- 71 files changed, 1259 insertions(+), 470 deletions(-) rename examples/{task_priority => task_arena}/fractal/Makefile (76%) rename examples/{task_priority => task_arena}/fractal/Makefile.windows (100%) rename examples/{task_priority => task_arena}/fractal/fractal.cpp (74%) rename examples/{task_priority => task_arena}/fractal/fractal.h (61%) rename examples/{task_priority => task_arena}/fractal/fractal_video.h (71%) rename examples/{task_priority => task_arena}/fractal/index.html (77%) rename examples/{task_priority => task_arena}/fractal/main.cpp (100%) rename examples/{task_priority => task_arena}/fractal/msvs/fractal.sln (100%) rename examples/{task_priority => task_arena}/fractal/msvs/fractal.vcxproj (100%) rename examples/{task_priority => task_arena}/fractal/msvs/gui.ico (100%) rename examples/{task_priority => task_arena}/fractal/msvs/gui.rc (100%) rename examples/{task_priority => task_arena}/fractal/msvs/resource.h (100%) rename examples/{task_priority => task_arena}/fractal/msvs/small.ico (100%) rename examples/{task_priority => task_arena}/fractal/xcode/fractal.xcodeproj/project.pbxproj (100%) rename examples/{task_priority => task_arena}/fractal/xcode/fractal.xcodeproj/xcshareddata/xcschemes/tbbExample.xcscheme (100%) rename examples/{task_priority => task_arena}/index.html (100%) rename index.src.html => index.html (71%) create mode 100644 src/perf/time_parallel_for_each.cpp create mode 100644 src/test/harness_allocator_overload.h diff --git a/CHANGES b/CHANGES index f538787744..942f7df5e3 100644 --- a/CHANGES +++ b/CHANGES @@ -2,9 +2,49 @@ The list of most significant changes made over time in Intel(R) Threading Building Blocks (Intel(R) TBB). +Intel TBB 4.4 Update 1 +TBB_INTERFACE_VERSION == 9001 + +Changes (w.r.t. Intel TBB 4.4): + +- Added support for Microsoft* Visual Studio* 2015. +- Intel TBB no longer performs dynamic replacement of memory allocation + functions for Microsoft Visual Studio 2005 and earlier versions. +- For GCC 4.7 and higher, the intrinsics-based platform isolation layer + uses __atomic_* built-ins instead of the legacy __sync_* ones. + This change is inspired by a contribution from Mathieu Malaterre. +- Improvements in task_arena: + Several application threads may join a task_arena and execute tasks + simultaneously. The amount of concurrency reserved for application + threads at task_arena construction can be set to any value between + 0 and the arena concurrency limit. +- The fractal example was modified to demonstrate class task_arena + and moved to examples/task_arena/fractal. + +Bugs fixed: + +- Fixed a deadlock during destruction of task_scheduler_init objects + when one of destructors is set to wait for worker threads. +- Added a workaround for a possible crash on OS X* when dynamic memory + allocator replacement (libtbbmalloc_proxy) is used and memory is + released during application startup. +- Usage of mutable functors with task_group::run_and_wait() and + task_arena::enqueue() is disabled. An attempt to pass a functor + which operator()() is not const will produce compilation errors. +- Makefiles and environment scripts now properly recognize GCC 5.0 and + higher. + +Open-source contributions integrated: + +- Improved performance of parallel_for_each for inputs allowing random + access, by Raf Schietekat. + +------------------------------------------------------------------------ Intel TBB 4.4 TBB_INTERFACE_VERSION == 9000 +Changes (w.r.t. Intel TBB 4.3 Update 6): + - The following features are now fully supported: tbb::flow::composite_node; additional policies of tbb::flow::graph_node::reset(). @@ -60,6 +100,7 @@ Intel TBB 4.3 Update 6 TBB_INTERFACE_VERSION == 8006 Changes (w.r.t. Intel TBB 4.3 Update 5): + - Supported zero-copy realloc for objects >1MB under Linux* via mremap system call. - C++11 move-aware insert and emplace methods have been added to @@ -67,6 +108,7 @@ Changes (w.r.t. Intel TBB 4.3 Update 5): - install_name is set to @rpath/ on OS X*. Preview Features: + - Added template class async_node to the flow graph API. It allows a flow graph to communicate with an external activity managed by the user or another runtime. @@ -75,6 +117,7 @@ Preview Features: - extract() method of graph nodes now takes no arguments. Bugs fixed: + - concurrent_unordered_{set,map} behaves correctly for degenerate hashes. - Fixed a race condition in the memory allocator that may lead to @@ -85,9 +128,11 @@ Intel TBB 4.3 Update 5 TBB_INTERFACE_VERSION == 8005 Changes (w.r.t. Intel TBB 4.3 Update 4): + - Added add_ref_count() method of class tbb::task. Preview Features: + - Added class global_control for application-wide control of allowed parallelism and thread stack size. - memory_pool_allocator now throws the std::bad_alloc exception on @@ -96,6 +141,7 @@ Preview Features: std::bad_alloc to std::invalid_argument and std::runtime_error. Bugs fixed: + - scalable_allocator now throws the std::bad_alloc exception on allocation failure. - Fixed a race condition in the memory allocator that may lead to @@ -104,6 +150,7 @@ Bugs fixed: might be unable to modify the number of worker threads. Open-source contributions integrated: + - (Added but not enabled) push_front() method of class tbb::task_list by Raf Schietekat. @@ -112,6 +159,7 @@ Intel TBB 4.3 Update 4 TBB_INTERFACE_VERSION == 8004 Changes (w.r.t. Intel TBB 4.3 Update 3): + - Added a C++11 variadic constructor for enumerable_thread_specific. The arguments from this constructor are used to construct thread-local values. @@ -123,6 +171,7 @@ Changes (w.r.t. Intel TBB 4.3 Update 3): concurrent unordered containers. Preview Features: + - Interface-breaking change: typedefs changed for node predecessor and successor lists, affecting copy_predecessors and copy_successors methods. @@ -132,6 +181,7 @@ Preview Features: automatically using the node port with index 0 for an edge. Open-source contributions integrated: + - Draft code for enumerable_thread_specific constructor with multiple arguments (see above) by Adrien Guinet. - Fix for GCC invocation on IBM* Blue Gene* diff --git a/README.md b/README.md index 8bce84b8bb..19088d19c4 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Intel(R) Threading Building Blocks 4.4 +# Intel(R) Threading Building Blocks 4.4 Update 1 Intel(R) Threading Building Blocks (Intel(R) TBB) lets you easily write parallel C++ programs that take full advantage of multicore performance, that are portable, composable and have future-proof scalability. @@ -8,7 +8,7 @@ Here are the latest [Changes] (CHANGES) and [Release Notes] (doc/Release_Notes.txt) (contains system requirements and known issues). ## Licensing -Intel(R) TBB 4.4 is licensed under [GPLv2] (COPYING) with the runtime exception. +Intel(R) TBB 4.4 Update 1 is licensed under [GPLv2] (COPYING) with the runtime exception. ## Documentation * Intel(R) TBB [tutorial] (https://software.intel.com/en-us/tbb-tutorial) diff --git a/build/android.inc b/build/android.inc index fbe68a7642..061e308bde 100644 --- a/build/android.inc +++ b/build/android.inc @@ -33,6 +33,8 @@ ifneq ("command line","$(origin arch)") export COMPILER_VERSION := ICC: $(shell icc -V &1 | grep 'Version') ifneq (,$(findstring IA-32, $(COMPILER_VERSION))) export arch:=ia32 + else ifneq (,$(findstring Intel(R) 64, $(COMPILER_VERSION))) + export arch:=intel64 else $(error "No support for Android in $(COMPILER_VERSION)") endif diff --git a/build/linux.gcc.inc b/build/linux.gcc.inc index 4b7122bd62..9d93cfc179 100644 --- a/build/linux.gcc.inc +++ b/build/linux.gcc.inc @@ -49,7 +49,7 @@ ifneq (,$(shell gcc -dumpversion | egrep "^(4\.[2-9]|[5-9])")) endif # gcc 4.8 and later support RTM intrinsics, but require command line switch to enable them -ifneq (,$(shell gcc -dumpversion | egrep "^4\.[8-9]")) +ifneq (,$(shell gcc -dumpversion | egrep "^(4\.[8-9]|[5-9])")) RTM_KEY = -mrtm endif diff --git a/build/windows.icl.inc b/build/windows.icl.inc index d5047d4ed1..687516860e 100644 --- a/build/windows.icl.inc +++ b/build/windows.icl.inc @@ -126,6 +126,9 @@ endif ifeq ($(VCCOMPAT_FLAG),) VCCOMPAT_FLAG := $(if $(findstring vc12, $(VCVERSION)),/Qvc12) endif +ifeq ($(VCCOMPAT_FLAG),) + VCCOMPAT_FLAG := $(if $(findstring vc14, $(VCVERSION)),/Qvc14) +endif ifeq ($(VCCOMPAT_FLAG),) $(error VC version not detected correctly: $(VCVERSION) ) endif diff --git a/doc/Release_Notes.txt b/doc/Release_Notes.txt index 661de4d90f..cb795750a3 100644 --- a/doc/Release_Notes.txt +++ b/doc/Release_Notes.txt @@ -106,6 +106,11 @@ Library Issues of Microsoft* Visual C++ 10.0 runtime (msvcp100d.dll) in order to run. + - For applications linked with the debug version of Microsoft* + Universal CRT (ucrtbased.dll, used since Microsoft Visual C++ + 14.0) dynamic replacement of memory management functions + is not supported. + - If an application uses static MSVCRT libraries or the Intel TBB library built with static MSVCRT (vc_mt variant), and throws an exception from a functor passed to task_group::run_and_wait(), @@ -157,6 +162,12 @@ Library Issues 4.8.2, and 4.9.2), the destructor of a task_group might not throw missing_wait exception. + - On OS X* 10.11 some examples might fail to run via makefiles in + case System Integrity Protection is enabled. In such case + instead of `make ` use the following command: + `run_cmd="DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH" make ` + or run executables directly. + ------------------------------------------------------------------------ Copyright (C) 2005-2015 Intel Corporation. All Rights Reserved. diff --git a/doc/html/a00241.html b/doc/html/a00241.html index 5af0b4cc89..9d9328168f 100644 --- a/doc/html/a00241.html +++ b/doc/html/a00241.html @@ -401,10 +401,10 @@  Parallel iteration over range with affinity_partitioner and user-supplied context. More...
 
parallel_for_each
-template<typename InputIterator , typename Function > -void parallel_for_each (InputIterator first, InputIterator last, const Function &f, task_group_context &context) - Calls function f for all items from [first, last) interval using user-supplied context. More...
-  +template<typename Iterator , typename Function > +void parallel_for_each (Iterator first, Iterator last, const Function &f, task_group_context &context) + Calls function f for all items from [first, last) interval using user-supplied context. More...
+  template<typename Range , typename Function > void parallel_for_each (Range &rng, const Function &f, task_group_context &context)  Calls function f for all items from rng using user-supplied context. More...
@@ -413,11 +413,11 @@ void parallel_for_each (const Range &rng, const Function &f, task_group_context &context)  Calls function f for all items from const rng user-supplied context. More...
  - -template<typename InputIterator , typename Function > -void parallel_for_each (InputIterator first, InputIterator last, const Function &f) - Uses default context.
-  + +template<typename Iterator , typename Function > +void parallel_for_each (Iterator first, Iterator last, const Function &f) + Uses default context.
template<typename Range , typename Function > void parallel_for_each (Range &rng, const Function &f) diff --git a/doc/html/a00256.html b/doc/html/a00256.html index 17f8841ef2..d248af6b83 100644 --- a/doc/html/a00256.html +++ b/doc/html/a00256.html @@ -131,10 +131,10 @@ - - - - + + + + @@ -143,11 +143,11 @@ - - - - + + + + @@ -558,8 +558,6 @@

Function Documentation

Parallel iteration over a range, with optional addition of more work.

-

Referenced by tbb::parallel_for_each().

- @@ -904,22 +902,22 @@

Function Documentation

- +
-template<typename InputIterator , typename Function >
+template<typename Iterator , typename Function >

parallel_for_each

template<typename InputIterator , typename Function >
void tbb::parallel_for_each (InputIterator first, InputIterator last, const Function &f, task_group_context &context)
 Calls function f for all items from [first, last) interval using user-supplied context. More...
 
template<typename Iterator , typename Function >
void tbb::parallel_for_each (Iterator first, Iterator last, const Function &f, task_group_context &context)
 Calls function f for all items from [first, last) interval using user-supplied context. More...
 
template<typename Range , typename Function >
void tbb::parallel_for_each (Range &rng, const Function &f, task_group_context &context)
 Calls function f for all items from rng using user-supplied context. More...
void tbb::parallel_for_each (const Range &rng, const Function &f, task_group_context &context)
 Calls function f for all items from const rng user-supplied context. More...
 
-template<typename InputIterator , typename Function >
void tbb::parallel_for_each (InputIterator first, InputIterator last, const Function &f)
 Uses default context.
 
+template<typename Iterator , typename Function >
void tbb::parallel_for_each (Iterator first, Iterator last, const Function &f)
 Uses default context.
 
template<typename Range , typename Function >
void tbb::parallel_for_each (Range &rng, const Function &f)
- + - + @@ -944,8 +942,6 @@

Function Documentation

Calls function f for all items from [first, last) interval using user-supplied context.

-

References tbb::parallel_do().

-

Referenced by tbb::parallel_for_each().

@@ -984,7 +980,7 @@

Function Documentation

Calls function f for all items from rng using user-supplied context.

-

References tbb::parallel_for_each().

+

References tbb::parallel_for_each().

@@ -1022,7 +1018,7 @@

Function Documentation

Calls function f for all items from const rng user-supplied context.

-

References tbb::parallel_for_each().

+

References tbb::parallel_for_each().

diff --git a/examples/common/toolset.props b/examples/common/toolset.props index a99e238c52..1c2c2fb99a 100644 --- a/examples/common/toolset.props +++ b/examples/common/toolset.props @@ -1,9 +1,9 @@  - Intel C++ Compiler XE 14.0 Intel C++ Compiler 15.0 [Intel(R) System Studio] Intel C++ Compiler XE 15.0 + Intel C++ Compiler 16.0 true diff --git a/examples/graph/index.html b/examples/graph/index.html index df37e757ad..71cc09aeac 100644 --- a/examples/graph/index.html +++ b/examples/graph/index.html @@ -14,6 +14,8 @@

Directories

A simplistic example of a collection of digital logic gates that can be easily composed into larger circuits.
som
A simple example of a Kohonen Self-Organizing Map using cancellation. +
fgbzip2 +
A parallel implementation of bzip2 block-sorting file compressor.
diff --git a/examples/index.html b/examples/index.html index 8eeb038bb1..b947bcd138 100644 --- a/examples/index.html +++ b/examples/index.html @@ -26,7 +26,7 @@

Directories

Examples using raw task interface.
task_group
Examples using task_group interface. -
task_priority +
task_arena
Examples using the task priority feature.
test_all
Examples that test all the parts of the package. diff --git a/examples/task_priority/fractal/Makefile b/examples/task_arena/fractal/Makefile similarity index 76% rename from examples/task_priority/fractal/Makefile rename to examples/task_arena/fractal/Makefile index 953dae54f9..b4fdf48a54 100644 --- a/examples/task_priority/fractal/Makefile +++ b/examples/task_arena/fractal/Makefile @@ -41,10 +41,12 @@ all: release test resources: ifeq ($(UI),mac) - mkdir -p $(APPRES)/en.lproj $(NAME).app/Contents/MacOS - cp ../../common/gui/xcode/tbbExample/Info.plist $(NAME).app/Contents + mkdir -p $(NAME).app/Contents/{MacOS,Resources/en.lproj} + cat ../../common/gui/xcode/tbbExample/Info.plist | sed -e "s/tbbExample/$(NAME)/" > $(NAME).app/Contents/Info.plist + cat ../../common/gui/xcode/tbbExample/launcher.sh | sed -e "s/tbbExample/$(NAME)/" > $(NAME).app/Contents/MacOS/launcher.sh + chmod +x $(NAME).app/Contents/MacOS/launcher.sh cp ../../common/gui/xcode/tbbExample/PkgInfo $(NAME).app/Contents - cp ../../common/gui/xcode/tbbExample/en.lproj/* $(APPRES)/en.lproj + cp ../../common/gui/xcode/tbbExample/en.lproj/* $(NAME).app/Contents/Resources/en.lproj endif # OS X* release: $(SOURCES) resources @@ -52,12 +54,20 @@ ifeq ($(UI),mac) $(CXX_UI) -O2 -DNDEBUG $(CXXFLAGS) -c $(MACUISOURCES) endif # OS X* $(CXX) -O2 -DNDEBUG $(CXXFLAGS) -o $(EXE) $(SOURCES) $(MACUIOBJS) -ltbb $(LIBS) +ifeq ($(UI),mac) + cp ../../../build/*_release/libtbb.dylib $(NAME).app/Contents/Resources + install_name_tool -change libtbb.dylib @executable_path/../Resources/libtbb.dylib $(EXE) +endif debug: resources ifeq ($(UI),mac) $(CXX_UI) -g -O0 -DTBB_USE_DEBUG $(CXXFLAGS) -c $(MACUISOURCES) endif # OS X* $(CXX) -g -O0 -DTBB_USE_DEBUG $(CXXFLAGS) -o $(EXE) $(SOURCES) $(MACUIOBJS) -ltbb_debug $(LIBS) +ifeq ($(UI),mac) + cp ../../../build/*_debug/libtbb_debug.dylib $(NAME).app/Contents/Resources + install_name_tool -change libtbb_debug.dylib @executable_path/../Resources/libtbb_debug.dylib $(EXE) +endif clean: $(RM) $(EXE) *.o *.d diff --git a/examples/task_priority/fractal/Makefile.windows b/examples/task_arena/fractal/Makefile.windows similarity index 100% rename from examples/task_priority/fractal/Makefile.windows rename to examples/task_arena/fractal/Makefile.windows diff --git a/examples/task_priority/fractal/fractal.cpp b/examples/task_arena/fractal/fractal.cpp similarity index 74% rename from examples/task_priority/fractal/fractal.cpp rename to examples/task_arena/fractal/fractal.cpp index 3dc2d20c1b..03f02987ec 100644 --- a/examples/task_priority/fractal/fractal.cpp +++ b/examples/task_arena/fractal/fractal.cpp @@ -23,19 +23,23 @@ #include "tbb/parallel_for.h" #include "tbb/blocked_range2d.h" #include "tbb/task_scheduler_init.h" +#include "tbb/task_arena.h" +#include "tbb/task_group.h" #include "tbb/tick_count.h" -#include "tbb/compat/thread" #include #include +// Included for __TBB_CPP11_LAMBDAS_PRESENT definition +#include "tbb/tbb_config.h" + video *v; extern bool silent; extern bool schedule_auto; extern int grain_size; -color_t fractal::calc_one_pixel(int x0, int y0) { - int iter; +color_t fractal::calc_one_pixel( int x0, int y0 ) const { + unsigned int iter; double fx0, fy0, xtemp, x, y, mu; color_t color; @@ -46,11 +50,13 @@ color_t fractal::calc_one_pixel(int x0, int y0) { fy0 = fy0 / magn + cy; iter = 0; x = 0; y = 0; + mu = 0; - while (((x*x + y*y) <= 4) && (iter < max_iterations)) { + while (((x*x + y*y) <= 4) && (iter < max_iterations)) { xtemp = x*x - y*y + fx0; y = 2*x*y + fy0; x = xtemp; + mu += exp(-sqrt(x*x+y*y)); iter++; } @@ -60,18 +66,6 @@ color_t fractal::calc_one_pixel(int x0, int y0) { return color; } - // compute again but with exponent calculation at each iteration - // it's all for coloring point outside the mandelbrot set - iter = 0; x = 0; y = 0; - mu = 0; - while (((x*x + y*y) <= 4) && (iter < max_iterations)) { - xtemp = x*x - y*y + fx0; - y = 2*x*y + fy0; - x = xtemp; - mu += exp(-sqrt(x*x+y*y)); - iter++; - } - int b = (int)(256*mu); int g = (b/8); int r = (g/16); @@ -85,7 +79,7 @@ color_t fractal::calc_one_pixel(int x0, int y0) { } void fractal::clear() { - drawing_area area( off_x, off_y, size_x, size_y, dm) ; + drawing_area area( off_x, off_y, size_x, size_y, dm ) ; // fill the rendering area with black color for (int y=0; y(0, size_y, grain_size, 0, size_x, grain_size ), - fractal_body(*this), tbb::auto_partitioner(), context); + body, tbb::auto_partitioner(), context); else tbb::parallel_for( tbb::blocked_range2d(0, size_y, grain_size, 0, size_x, grain_size ), - fractal_body(*this), tbb::simple_partitioner(), context); + body, tbb::simple_partitioner(), context); } void fractal::run( tbb::task_group_context &context ) { clear(); + context.reset(); render( context ); } -bool fractal::check_point( int x, int y ) { - return x >= off_x && x <= off_x+size_x && +bool fractal::check_point( int x, int y ) const { + return x >= off_x && x <= off_x+size_x && y >= off_y && y <= off_y+size_y; } @@ -177,17 +177,10 @@ void fractal_group::calc_fractal( int num ) { } } -void fg_thread_func(fractal_group *fg) { - // initialize the task scheduler for the second thread - tbb::task_scheduler_init init( fg->get_num_threads() ); - // calculate the second fractal - fg->calc_fractal( 1 ); -} - void fractal_group::set_priorities() { // set the high priority for the active area and the normal priority for another area context[active].set_priority( tbb::priority_high ); - context[active^1].set_priority( tbb::priority_normal ); + context[active^1].set_priority( tbb::priority_low ); } void fractal_group::switch_priorities( int new_active ) { @@ -197,11 +190,39 @@ void fractal_group::switch_priorities( int new_active ) { draw_borders(); } -void fractal_group::set_num_frames_at_least(int n) { +void fractal_group::set_num_frames_at_least( int n ) { if ( num_frames[0]join(); - delete fg_thread; + arena.execute( [&] { gr.wait(); } ); +#else + arena.execute( arena_body_wait( gr ) ); +#endif } delete[] context; @@ -233,7 +269,7 @@ void fractal_group::draw_borders() { f1.draw_border( active==1 ); } -fractal_group::fractal_group( const drawing_memory &_dm, int _num_threads, int _max_iterations, int _num_frames ) : f0(_dm), f1(_dm), num_threads(_num_threads) { +fractal_group::fractal_group( const drawing_memory &_dm, int _num_threads, unsigned int _max_iterations, int _num_frames ) : f0(_dm), f1(_dm), num_threads(_num_threads) { // set rendering areas f0.size_x = f1.size_x = _dm.sizex/2-4; f0.size_y = f1.size_y = _dm.sizey-4; @@ -251,7 +287,7 @@ fractal_group::fractal_group( const drawing_memory &_dm, int _num_threads, int _ num_frames[0] = num_frames[1] = _num_frames; } -void fractal_group::mouse_click(int x, int y) { +void fractal_group::mouse_click( int x, int y ) { // assumption that the point is not inside any fractal area int new_active = -1; diff --git a/examples/task_priority/fractal/fractal.h b/examples/task_arena/fractal/fractal.h similarity index 61% rename from examples/task_priority/fractal/fractal.h rename to examples/task_arena/fractal/fractal.h index 92b3d82e8c..12f178eaf3 100644 --- a/examples/task_priority/fractal/fractal.h +++ b/examples/task_arena/fractal/fractal.h @@ -37,13 +37,14 @@ class fractal { //! Fractal properties float cx, cy; float magn; - int max_iterations; + float step; + unsigned int max_iterations; //! Drawing memory object for rendering const drawing_memory &dm; //! One pixel calculation routine - color_t calc_one_pixel(int x, int y); + color_t calc_one_pixel( int x, int y ) const; //! Clears the fractal area void clear(); //! Draws the border around the fractal area @@ -51,11 +52,11 @@ class fractal { //! Renders the fractal void render( tbb::task_group_context &context ); //! Check if the point is inside the fractal area - bool check_point( int x, int y); + bool check_point( int x, int y ) const; public: //! Constructor - fractal( const drawing_memory &dm ) : dm(dm) { + fractal( const drawing_memory &dm ) : step(0.2), dm(dm) { #if _MSC_VER && _WIN64 && !__INTEL_COMPILER // Workaround for MSVC x64 compiler issue volatile int i=0; @@ -64,18 +65,29 @@ class fractal { //! Runs the fractal calculation void run( tbb::task_group_context &context ); //! Renders the fractal rectangular area - void render_rect(int x0, int y0, int x1, int y1); + void render_rect( int x0, int y0, int x1, int y1 ) const; + + void move_up() { cy += step; } + void move_down() { cy -= step; } + void move_left() { cx += step; } + void move_right(){ cx -= step; } + + void zoom_in() { magn *= 2.; step /= 2.; } + void zoom_out(){ magn /= 2.; step *= 2.; } + + void quality_inc() { max_iterations += max_iterations/2; } + void quality_dec() { max_iterations -= max_iterations/2; } friend class fractal_group; }; //! The group of fractals class fractal_group { - //! Fractals defenition + //! Fractals definition fractal f0, f1; //! Number of frames to calculate tbb::atomic num_frames[2]; - //! Task group contexts to manage prioroties + //! Task group contexts to manage priorities tbb::task_group_context *context; //! Border type enumeration @@ -96,19 +108,56 @@ class fractal_group { public: //! Constructor - fractal_group( const drawing_memory &_dm, int num_threads = tbb::task_scheduler_init::automatic, int max_iterations = 100000, int num_frames = 1); + fractal_group( const drawing_memory &_dm, + int num_threads = tbb::task_scheduler_init::automatic, + unsigned int max_iterations = 100000, int num_frames = 1 ); //! Run calculation void run( bool create_second_fractal=true ); //! Mouse event handler - void mouse_click(int x, int y); + void mouse_click( int x, int y ); //! Fractal calculation routine void calc_fractal( int num ); //! Get number of threads int get_num_threads() const { return num_threads; } //! Reset the number of frames to be not less than the given value - void set_num_frames_at_least(int n); + void set_num_frames_at_least( int n ); //! Switches the priorities of two fractals void switch_priorities( int new_active=-1 ); + //! Get active fractal + fractal& get_active_fractal() { return active ? f1 : f0; } + + void active_fractal_zoom_in() { + get_active_fractal().zoom_in(); + context[active].cancel_group_execution(); + } + void active_fractal_zoom_out() { + get_active_fractal().zoom_out(); + context[active].cancel_group_execution(); + } + void active_fractal_quality_inc() { + get_active_fractal().quality_inc(); + context[active].cancel_group_execution(); + } + void active_fractal_quality_dec() { + get_active_fractal().quality_dec(); + context[active].cancel_group_execution(); + } + void active_fractal_move_up() { + get_active_fractal().move_up(); + context[active].cancel_group_execution(); + } + void active_fractal_move_down() { + get_active_fractal().move_down(); + context[active].cancel_group_execution(); + } + void active_fractal_move_left() { + get_active_fractal().move_left(); + context[active].cancel_group_execution(); + } + void active_fractal_move_right() { + get_active_fractal().move_right(); + context[active].cancel_group_execution(); + } }; #endif /* FRACTAL_H_ */ diff --git a/examples/task_priority/fractal/fractal_video.h b/examples/task_arena/fractal/fractal_video.h similarity index 71% rename from examples/task_priority/fractal/fractal_video.h rename to examples/task_arena/fractal/fractal_video.h index 02eda48714..d56d03fc7d 100644 --- a/examples/task_priority/fractal/fractal_video.h +++ b/examples/task_arena/fractal/fractal_video.h @@ -43,13 +43,31 @@ class fractal_video : public video void on_key( int key ) { switch ( key&0xff ) { - case 27: - running = false; break; - case ' ': // space - if( fg ) fg->switch_priorities(); - default: - if( fg ) fg->set_num_frames_at_least(20); + case esc_key: + running = false; break; + case ' ': // space + if( fg ) fg->switch_priorities(); break; + + case 'q': + if( fg ) fg->active_fractal_zoom_in(); break; + case 'e': + if( fg ) fg->active_fractal_zoom_out(); break; + + case 'r': + if( fg ) fg->active_fractal_quality_inc(); break; + case 'f': + if( fg ) fg->active_fractal_quality_dec(); break; + + case 'w': + if( fg ) fg->active_fractal_move_up(); break; + case 'a': + if( fg ) fg->active_fractal_move_left(); break; + case 's': + if( fg ) fg->active_fractal_move_down(); break; + case 'd': + if( fg ) fg->active_fractal_move_right(); break; } + if( fg ) fg->set_num_frames_at_least(20); } void on_process() { diff --git a/examples/task_priority/fractal/index.html b/examples/task_arena/fractal/index.html similarity index 77% rename from examples/task_priority/fractal/index.html rename to examples/task_arena/fractal/index.html index 2c3583dd29..da80481141 100644 --- a/examples/task_priority/fractal/index.html +++ b/examples/task_arena/fractal/index.html @@ -3,7 +3,7 @@

Overview

The example calculates two classical Mandelbrot fractals with different priorities. -The application window is divided into two areas where fractals are rendered. With mouse click on an area the user can change the priority of the calculating fractal. In the clicked area the fractal priority is changed to be "high" and the priority of the other fractal is changed to "normal". +The application window is divided into two areas where fractals are rendered. With mouse click on an area the user can change the priority of the calculating fractal. In the clicked area the fractal priority is changed to be "high" and the priority of the other fractal is changed to "low". The fractal with "high" priority we will call active. The example also has the console mode but in this mode the priorities could not be changed during execution. @@ -52,6 +52,34 @@

Usage


Run it with a small fractal iterations number and the desired number of threads, e.g., fractal 4 1 10000. +

Hot keys

+The following hot keys can be used in interactive execution mode when the example is compiled with the graphical +user interface: +
+
<left mouse button> +
Make the fractal active and change its priority to high +
<space> +
Switch priorities +
<w> +
Move the active fractal up +
<a> +
Move the active fractal to the left +
<s> +
Move the active fractal down +
<d> +
Move the active fractal to the right +
<q> +
Zoom in the active fractal +
<e> +
Zoom out the active fractal +
<r> +
Increase quality (count of iterations for each pixel) the active fractal +
<f> +
Decrease quality (count of iterations for each pixel) the active fractal +
<esc> +
Stop execution. +
+
Up to parent directory

diff --git a/examples/task_priority/fractal/main.cpp b/examples/task_arena/fractal/main.cpp similarity index 100% rename from examples/task_priority/fractal/main.cpp rename to examples/task_arena/fractal/main.cpp diff --git a/examples/task_priority/fractal/msvs/fractal.sln b/examples/task_arena/fractal/msvs/fractal.sln similarity index 100% rename from examples/task_priority/fractal/msvs/fractal.sln rename to examples/task_arena/fractal/msvs/fractal.sln diff --git a/examples/task_priority/fractal/msvs/fractal.vcxproj b/examples/task_arena/fractal/msvs/fractal.vcxproj similarity index 100% rename from examples/task_priority/fractal/msvs/fractal.vcxproj rename to examples/task_arena/fractal/msvs/fractal.vcxproj diff --git a/examples/task_priority/fractal/msvs/gui.ico b/examples/task_arena/fractal/msvs/gui.ico similarity index 100% rename from examples/task_priority/fractal/msvs/gui.ico rename to examples/task_arena/fractal/msvs/gui.ico diff --git a/examples/task_priority/fractal/msvs/gui.rc b/examples/task_arena/fractal/msvs/gui.rc similarity index 100% rename from examples/task_priority/fractal/msvs/gui.rc rename to examples/task_arena/fractal/msvs/gui.rc diff --git a/examples/task_priority/fractal/msvs/resource.h b/examples/task_arena/fractal/msvs/resource.h similarity index 100% rename from examples/task_priority/fractal/msvs/resource.h rename to examples/task_arena/fractal/msvs/resource.h diff --git a/examples/task_priority/fractal/msvs/small.ico b/examples/task_arena/fractal/msvs/small.ico similarity index 100% rename from examples/task_priority/fractal/msvs/small.ico rename to examples/task_arena/fractal/msvs/small.ico diff --git a/examples/task_priority/fractal/xcode/fractal.xcodeproj/project.pbxproj b/examples/task_arena/fractal/xcode/fractal.xcodeproj/project.pbxproj similarity index 100% rename from examples/task_priority/fractal/xcode/fractal.xcodeproj/project.pbxproj rename to examples/task_arena/fractal/xcode/fractal.xcodeproj/project.pbxproj diff --git a/examples/task_priority/fractal/xcode/fractal.xcodeproj/xcshareddata/xcschemes/tbbExample.xcscheme b/examples/task_arena/fractal/xcode/fractal.xcodeproj/xcshareddata/xcschemes/tbbExample.xcscheme similarity index 100% rename from examples/task_priority/fractal/xcode/fractal.xcodeproj/xcshareddata/xcschemes/tbbExample.xcscheme rename to examples/task_arena/fractal/xcode/fractal.xcodeproj/xcshareddata/xcschemes/tbbExample.xcscheme diff --git a/examples/task_priority/index.html b/examples/task_arena/index.html similarity index 100% rename from examples/task_priority/index.html rename to examples/task_arena/index.html diff --git a/include/tbb/concurrent_vector.h b/include/tbb/concurrent_vector.h index 484de99e75..80a57f12de 100644 --- a/include/tbb/concurrent_vector.h +++ b/include/tbb/concurrent_vector.h @@ -127,6 +127,12 @@ namespace internal { T* pointer() const { return static_cast(const_cast(array)); } }; + friend void enforce_segment_allocated(segment_value_t const& s, internal::exception_id exception = eid_bad_last_alloc){ + if(s != segment_allocated()){ + internal::throw_exception(exception); + } + } + // Segment pointer. class segment_t { atomic array; @@ -1153,8 +1159,9 @@ class concurrent_vector: protected internal::allocator_base, pointer internal_push_back_result(){ return g.element;} iterator return_iterator_and_dismiss(){ + pointer ptr = g.element; g.dismiss(); - return iterator(v, k, g.element); + return iterator(v, k, ptr); } }; }; @@ -1235,8 +1242,7 @@ T& concurrent_vector::internal_subscript_with_exceptions( size_type index //TODO: why not make a load of my_segment relaxed as well ? //TODO: add an assertion that my_segment[k] is properly aligned to please ITT segment_value_t segment_value = my_segment[k].template load(); - if( segment_value != segment_allocated() ) // check for correct segment pointer - internal::throw_exception(internal::eid_index_range_error); // throw std::range_error + enforce_segment_allocated(segment_value, internal::eid_index_range_error); return (segment_value.pointer())[j]; } diff --git a/include/tbb/flow_graph.h b/include/tbb/flow_graph.h index 77f86b7230..f3466388a9 100644 --- a/include/tbb/flow_graph.h +++ b/include/tbb/flow_graph.h @@ -3528,7 +3528,7 @@ class composite_node , tbb::flow::tuple > : p }; // class composite_node -#endif // __TBB_PREVIEW_COMPOSITE_NODE +#endif // __TBB_FLOW_GRAPH_CPP11_FEATURES #if __TBB_PREVIEW_ASYNC_NODE namespace internal { diff --git a/include/tbb/internal/_flow_graph_node_impl.h b/include/tbb/internal/_flow_graph_node_impl.h index a18855d5a4..f0efc907e9 100644 --- a/include/tbb/internal/_flow_graph_node_impl.h +++ b/include/tbb/internal/_flow_graph_node_impl.h @@ -851,7 +851,7 @@ namespace internal { }; // multifunction_output //composite_node -#if TBB_PREVIEW_FLOW_GRAPH_TRACE +#if TBB_PREVIEW_FLOW_GRAPH_TRACE && __TBB_FLOW_GRAPH_CPP11_FEATURES template void add_nodes_impl(CompositeType*, bool) {} diff --git a/include/tbb/machine/gcc_generic.h b/include/tbb/machine/gcc_generic.h index 257af96574..53b528176b 100644 --- a/include/tbb/machine/gcc_generic.h +++ b/include/tbb/machine/gcc_generic.h @@ -50,6 +50,9 @@ #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT #endif +#if __TBB_GCC_VERSION < 40700 +// Use __sync_* builtins + /** As this generic implementation has absolutely no information about underlying hardware, its performance most likely will be sub-optimal because of full memory fence usages where a more lightweight synchronization means (or none at all) @@ -64,10 +67,37 @@ inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) { \ return __sync_val_compare_and_swap(reinterpret_cast(ptr),comparand,value); \ } \ - \ inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) { \ return __sync_fetch_and_add(reinterpret_cast(ptr),value); \ +} + +#define __TBB_USE_GENERIC_FETCH_STORE 1 + +#else +// __TBB_GCC_VERSION >= 40700; use __atomic_* builtins available since gcc 4.7 + +#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory") +// Acquire and release fence intrinsics in GCC might miss compiler fence. +// Adding it at both sides of an intrinsic, as we do not know what reordering can be made. +#define __TBB_acquire_consistency_helper() __TBB_compiler_fence(); __atomic_thread_fence(__ATOMIC_ACQUIRE); __TBB_compiler_fence() +#define __TBB_release_consistency_helper() __TBB_compiler_fence(); __atomic_thread_fence(__ATOMIC_RELEASE); __TBB_compiler_fence() +#define __TBB_full_memory_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST) +#define __TBB_control_consistency_helper() __TBB_acquire_consistency_helper() + +#define __TBB_MACHINE_DEFINE_ATOMICS(S,T) \ +inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) { \ + (void)__atomic_compare_exchange_n(reinterpret_cast(ptr), &comparand, value, \ + false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \ + return comparand; \ } \ +inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) { \ + return __atomic_fetch_add(reinterpret_cast(ptr), value, __ATOMIC_SEQ_CST); \ +} \ +inline T __TBB_machine_fetchstore##S( volatile void *ptr, T value ) { \ + return __atomic_exchange_n(reinterpret_cast(ptr), value, __ATOMIC_SEQ_CST); \ +} + +#endif // __TBB_GCC_VERSION < 40700 __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t) __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t) @@ -86,6 +116,13 @@ static inline intptr_t __TBB_machine_lg( uintptr_t x ) { return sizeof(x)*8 - tbb::internal::gcc_builtins::clz(x) -1 ; } + +typedef unsigned char __TBB_Flag; +typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag; + +#if __TBB_GCC_VERSION < 40700 +// Use __sync_* builtins + static inline void __TBB_machine_or( volatile void *ptr, uintptr_t addend ) { __sync_fetch_and_or(reinterpret_cast(ptr),addend); } @@ -94,19 +131,35 @@ static inline void __TBB_machine_and( volatile void *ptr, uintptr_t addend ) { __sync_fetch_and_and(reinterpret_cast(ptr),addend); } +inline bool __TBB_machine_try_lock_byte( __TBB_atomic_flag &flag ) { + return __sync_lock_test_and_set(&flag,1)==0; +} -typedef unsigned char __TBB_Flag; +inline void __TBB_machine_unlock_byte( __TBB_atomic_flag &flag ) { + __sync_lock_release(&flag); +} -typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag; +#else +// __TBB_GCC_VERSION >= 40700; use __atomic_* builtins available since gcc 4.7 + +static inline void __TBB_machine_or( volatile void *ptr, uintptr_t addend ) { + __atomic_fetch_or(reinterpret_cast(ptr),addend,__ATOMIC_SEQ_CST); +} + +static inline void __TBB_machine_and( volatile void *ptr, uintptr_t addend ) { + __atomic_fetch_and(reinterpret_cast(ptr),addend,__ATOMIC_SEQ_CST); +} inline bool __TBB_machine_try_lock_byte( __TBB_atomic_flag &flag ) { - return __sync_lock_test_and_set(&flag,1)==0; + return !__atomic_test_and_set(&flag,__ATOMIC_ACQUIRE); } inline void __TBB_machine_unlock_byte( __TBB_atomic_flag &flag ) { - __sync_lock_release(&flag); + __atomic_clear(&flag,__ATOMIC_RELEASE); } +#endif // __TBB_GCC_VERSION < 40700 + // Machine specific atomic operations #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) @@ -117,7 +170,7 @@ inline void __TBB_machine_unlock_byte( __TBB_atomic_flag &flag ) { // Definition of other functions #define __TBB_Log2(V) __TBB_machine_lg(V) -#define __TBB_USE_GENERIC_FETCH_STORE 1 +// TODO: implement with __atomic_* builtins where available #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 #define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1 diff --git a/include/tbb/parallel_for_each.h b/include/tbb/parallel_for_each.h index e0527dde6f..f3d64d89a0 100644 --- a/include/tbb/parallel_for_each.h +++ b/include/tbb/parallel_for_each.h @@ -22,6 +22,7 @@ #define __TBB_parallel_for_each_H #include "parallel_do.h" +#include "parallel_for.h" namespace tbb { @@ -29,16 +30,59 @@ namespace tbb { namespace internal { // The class calls user function in operator() template - class parallel_for_each_body : internal::no_assign { + class parallel_for_each_body_do : internal::no_assign { const Function &my_func; public: - parallel_for_each_body(const Function &_func) : my_func(_func) {} - parallel_for_each_body(const parallel_for_each_body &_caller) : my_func(_caller.my_func) {} + parallel_for_each_body_do(const Function &_func) : my_func(_func) {} - void operator() ( typename std::iterator_traits::reference value ) const { + void operator()(typename std::iterator_traits::reference value) const { my_func(value); } }; + + // The class calls user function in operator() + template + class parallel_for_each_body_for : internal::no_assign { + const Function &my_func; + public: + parallel_for_each_body_for(const Function &_func) : my_func(_func) {} + + void operator()(tbb::blocked_range range) const { +#if __INTEL_COMPILER +#pragma ivdep +#endif + for(Iterator it = range.begin(), end = range.end(); it != end; ++it) { + my_func(*it); + } + } + }; + + template + struct parallel_for_each_impl { +#if __TBB_TASK_GROUP_CONTEXT + static void doit(Iterator first, Iterator last, const Function& f, task_group_context &context) { + internal::parallel_for_each_body_do body(f); + tbb::parallel_do(first, last, body, context); + } +#endif + static void doit(Iterator first, Iterator last, const Function& f) { + internal::parallel_for_each_body_do body(f); + tbb::parallel_do(first, last, body); + } + }; + template + struct parallel_for_each_impl { +#if __TBB_TASK_GROUP_CONTEXT + static void doit(Iterator first, Iterator last, const Function& f, task_group_context &context) { + internal::parallel_for_each_body_for body(f); + tbb::parallel_for(tbb::blocked_range(first, last), body, context); + } +#endif + static void doit(Iterator first, Iterator last, const Function& f) { + internal::parallel_for_each_body_for body(f); + tbb::parallel_for(tbb::blocked_range(first, last), body); + } + }; } // namespace internal //! @endcond @@ -48,10 +92,9 @@ namespace internal { //! Calls function f for all items from [first, last) interval using user-supplied context /** @ingroup algorithms */ #if __TBB_TASK_GROUP_CONTEXT -template -void parallel_for_each(InputIterator first, InputIterator last, const Function& f, task_group_context &context) { - internal::parallel_for_each_body body(f); - tbb::parallel_do (first, last, body, context); +template +void parallel_for_each(Iterator first, Iterator last, const Function& f, task_group_context &context) { + internal::parallel_for_each_impl::iterator_category>::doit(first, last, f, context); } //! Calls function f for all items from rng using user-supplied context @@ -70,10 +113,9 @@ void parallel_for_each(const Range& rng, const Function& f, task_group_context& #endif /* __TBB_TASK_GROUP_CONTEXT */ //! Uses default context -template -void parallel_for_each(InputIterator first, InputIterator last, const Function& f) { - internal::parallel_for_each_body body(f); - tbb::parallel_do (first, last, body); +template +void parallel_for_each(Iterator first, Iterator last, const Function& f) { + internal::parallel_for_each_impl::iterator_category>::doit(first, last, f); } //! Uses default context diff --git a/include/tbb/task.h b/include/tbb/task.h index a416f3e27e..2c1029183c 100644 --- a/include/tbb/task.h +++ b/include/tbb/task.h @@ -901,7 +901,11 @@ class empty_task: public task { namespace internal { template class function_task : public task { +#if __TBB_ALLOW_MUTABLE_FUNCTORS F my_func; +#else + const F my_func; +#endif /*override*/ task* execute() { my_func(); return NULL; diff --git a/include/tbb/tbb_config.h b/include/tbb/tbb_config.h index 112c768b65..8c2226fe98 100644 --- a/include/tbb/tbb_config.h +++ b/include/tbb/tbb_config.h @@ -138,13 +138,12 @@ #endif #define __TBB_STATIC_ASSERT_PRESENT (__INTEL_CXX11_MODE__ || _MSC_VER >= 1600) #define __TBB_CPP11_TUPLE_PRESENT (_MSC_VER >= 1600 || (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40300)) - /**Intel C++ compiler 14.0 crashes on using __has_include. When it fixed, condition will need to be updated. **/ #if (__clang__ && __INTEL_COMPILER > 1400) + /* Older versions of Intel Compiler do not have __has_include */ #if (__has_feature(__cxx_generalized_initializers__) && __has_include()) #define __TBB_INITIALIZER_LISTS_PRESENT 1 #endif #else - /** TODO: when MSVC2013 is supported by Intel C++ compiler, it will be enabled silently by compiler, so rule will need to be updated.**/ #define __TBB_INITIALIZER_LISTS_PRESENT __INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1400 && (_MSC_VER >= 1800 || __TBB_GCC_VERSION >= 40400 || _LIBCPP_VERSION) #endif @@ -206,7 +205,7 @@ #define __TBB_STATIC_ASSERT_PRESENT (_MSC_VER >= 1600) #define __TBB_CPP11_TUPLE_PRESENT (_MSC_VER >= 1600) #define __TBB_INITIALIZER_LISTS_PRESENT (_MSC_VER >= 1800) - #define __TBB_CONSTEXPR_PRESENT 0 + #define __TBB_CONSTEXPR_PRESENT (_MSC_VER >= 1900) #define __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT (_MSC_VER >= 1800) #define __TBB_NOEXCEPT_PRESENT (_MSC_VER >= 1900) #define __TBB_CPP11_STD_BEGIN_END_PRESENT (_MSC_VER >= 1700) @@ -322,23 +321,21 @@ #endif #ifndef TBB_IMPLEMENT_CPP0X - /** By default, use C++11 classes if available **/ - #if __GNUC__==4 && __GNUC_MINOR__>=4 && __GXX_EXPERIMENTAL_CXX0X__ - #define TBB_IMPLEMENT_CPP0X 0 - #elif __clang__ && __cplusplus >= 201103L - //TODO: consider introducing separate macros for each file? - //prevent injection of corresponding tbb names into std:: namespace if native headers are present - #if __has_include() || __has_include() - #define TBB_IMPLEMENT_CPP0X 0 +/** By default, use C++11 classes if available **/ + #if __clang__ + /* Old versions of Intel Compiler do not have __has_include */ + #if (__INTEL_COMPILER && __INTEL_COMPILER <= 1400) + #define TBB_IMPLEMENT_CPP0X !(_LIBCPP_VERSION && (__cplusplus >= 201103L)) #else - #define TBB_IMPLEMENT_CPP0X 1 + #define TBB_IMPLEMENT_CPP0X (__cplusplus < 201103L || (!__has_include() && !__has_include())) #endif - #elif _MSC_VER>=1700 - #define TBB_IMPLEMENT_CPP0X 0 - #elif __STDCPP_THREADS__ - #define TBB_IMPLEMENT_CPP0X 0 + #elif __GNUC__ + #define TBB_IMPLEMENT_CPP0X (__TBB_GCC_VERSION < 40400 || !__GXX_EXPERIMENTAL_CXX0X__) + #elif _MSC_VER + #define TBB_IMPLEMENT_CPP0X (_MSC_VER < 1700) #else - #define TBB_IMPLEMENT_CPP0X 1 + // TODO: Reconsider general approach to be more reliable, e.g. (!(__cplusplus >= 201103L && __ STDC_HOSTED__)) + #define TBB_IMPLEMENT_CPP0X (!__STDCPP_THREADS__) #endif #endif /* TBB_IMPLEMENT_CPP0X */ @@ -628,7 +625,7 @@ #define __TBB_FORCE_64BIT_ALIGNMENT_BROKEN 0 #endif -#if __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT && __TBB_GCC_VERSION < 40700 && !defined(__INTEL_COMPILER) && !defined (__clang__) +#if __GNUC__ && !__INTEL_COMPILER && !__clang__ && __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT && __TBB_GCC_VERSION < 40700 #define __TBB_ZERO_INIT_WITH_DEFAULTED_CTOR_BROKEN 1 #endif diff --git a/include/tbb/tbb_stddef.h b/include/tbb/tbb_stddef.h index a1bc9b0629..fb66716801 100644 --- a/include/tbb/tbb_stddef.h +++ b/include/tbb/tbb_stddef.h @@ -26,7 +26,7 @@ #define TBB_VERSION_MINOR 4 // Engineering-focused interface version -#define TBB_INTERFACE_VERSION 9000 +#define TBB_INTERFACE_VERSION 9001 #define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000 // The oldest major interface version still supported diff --git a/index.src.html b/index.html similarity index 71% rename from index.src.html rename to index.html index 1f815e53b7..d4692c7822 100644 --- a/index.src.html +++ b/index.html @@ -1,33 +1,41 @@ -

Overview

Top level directory for Intel® Threading Building Blocks. +

Common directories

+
+
doc +
Documentation for the library. +
include +
Include files required for compiling code that uses the library. +
examples +
Examples of how to use the library. +
+

Intel TBB source package

To build Intel TBB, use the top-level Makefile; see also the build directions. To port Intel TBB to a new platform, operating system or architecture, see the porting directions.

- -

Files

+

Files

Makefile -
Top-level Makefile for Intel TBB. See also the build directions. +
Top-level Makefile for Intel TBB. See also the build directions.
- -

Directories

+

Directories

-
doc -
Documentation for the library. -
include -
Include files required for compiling code that uses the library. -
examples -
Examples of how to use the library.
src
Source code for the library.
build
Internal Makefile infrastructure for Intel TBB. Do not use directly; see the build directions.
- +

Intel TBB binary package

+

Directories

+
+
bin +
Start-up scripts for sourcing library for Linux* OS and OS X*. For Windows* OS: start-up scripts and dynamic-link libraries. +
lib +
Platform-specific binary files for the library. +

Copyright © 2005-2015 Intel Corporation. All Rights Reserved. @@ -38,4 +46,3 @@

Directories

* Other names and brands may be claimed as the property of others. - diff --git a/jni/Application.mk b/jni/Application.mk index feee666c27..11c18fc82e 100644 --- a/jni/Application.mk +++ b/jni/Application.mk @@ -35,11 +35,11 @@ endif endif endif -APP_PLATFORM:=android-20 +APP_PLATFORM:=android-21 NDK_TOOLCHAIN_VERSION:=4.9 # Intel(R) C++ Compiler does not support ndk r10 version yet. -ifeq (icc,$(compiler)) +ifeq (iccx86,$(compiler)$(APP_ABI)) APP_PLATFORM:=android-9 NDK_TOOLCHAIN_VERSION:=4.8 endif diff --git a/src/perf/time_hash_map_fill.cpp b/src/perf/time_hash_map_fill.cpp index 13efc41362..a79fbcda24 100644 --- a/src/perf/time_hash_map_fill.cpp +++ b/src/perf/time_hash_map_fill.cpp @@ -108,8 +108,8 @@ struct Uniques : TesterBase { // Executes test mode for a given thread. Return value is ignored when used with timing wrappers. /*override*/ double test(int testn, int t) { - if( testn != 1 ) { // do insertions - for(int i = testn*value+t*n_items, e = testn*value+(t+1)*n_items; i < e; i++) { + if( testn == 0 ) { // do insertions + for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) { Table.insert( std::make_pair(Data[i],t) ); } } else { // do last finds @@ -138,7 +138,7 @@ void execute_percent(test_sandbox &the_test, int p) { int uniques = p==100?std::numeric_limits::max() : MAX_TABLE_SIZE; ASSERT(p==100 || p <= 30, "Function is broken for %% > 30 except for 100%%"); for(int i = 0; i < input_size; i++) - Data[i] = rand()%uniques; + Data[i] = (rand()*rand())%uniques; for(int t = MinThread; t <= MaxThread; t++) the_test.factory(input_size, t); // executes the tests specified in BOX-es for given 'value' and threads the_test.report.SetRoundTitle(rounds++, "%d%%", p); diff --git a/src/perf/time_parallel_for_each.cpp b/src/perf/time_parallel_for_each.cpp new file mode 100644 index 0000000000..15a814578e --- /dev/null +++ b/src/perf/time_parallel_for_each.cpp @@ -0,0 +1,70 @@ +/* + Copyright 2005-2015 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. Threading Building Blocks is free software; + you can redistribute it and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. Threading Building Blocks is + distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. You should have received a copy of + the GNU General Public License along with Threading Building Blocks; if not, write to the + Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software library without + restriction. Specifically, if other files instantiate templates or use macros or inline + functions from this file, or you compile this file and link it with other files to produce + an executable, this file does not by itself cause the resulting executable to be covered + by the GNU General Public License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General Public License. +*/ + +#include +#include +#include +#include +#include +#include + +#include "tbb/parallel_for_each.h" +#include "tbb/tick_count.h" + +template +void foo( Type &f ) { + f += 1.0f; +} + +template +void test( std::string testName, const int N, const int numRepeats ) { + typedef typename Container::value_type Type; + Container v; + + for ( int i = 0; i < N; ++i ) { + v.push_back( static_cast(std::rand()) ); + } + + std::vector times; + times.reserve( numRepeats ); + + for ( int i = 0; i < numRepeats; ++i ) { + tbb::tick_count t0 = tbb::tick_count::now(); + tbb::parallel_for_each( v.begin(), v.end(), foo ); + tbb::tick_count t1 = tbb::tick_count::now(); + times.push_back( (t1 - t0).seconds()*1000 ); + } + + std::sort( times.begin(), times.end() ); + std::cout << "Test " << testName << std::endl + << "min " << times[times.size() / 20] << " ms " << std::endl + << "med " << times[times.size() / 2] << " ms " << std::endl + << "max " << times[times.size() - times.size() / 20 - 1] << " ms " << std::endl; +} + +int main( int argc, char* argv[] ) { + const int N = argc > 1 ? std::atoi( argv[1] ) : 10 * 1000; + const int numRepeats = argc > 2 ? std::atoi( argv[2] ) : 10; + + test< std::vector >( "std::vector", N, numRepeats ); + test< std::list >( "std::list", N / 100, numRepeats ); + + return 0; +} diff --git a/src/rml/test/test_rml_mixed.cpp b/src/rml/test/test_rml_mixed.cpp index 8636c3e35c..3b2b06a41d 100644 --- a/src/rml/test/test_rml_mixed.cpp +++ b/src/rml/test/test_rml_mixed.cpp @@ -38,7 +38,7 @@ int TestMain () { // non-deterministic. Thus dynamic_link fails on some systems when the // application changes its current directory after the library (TBB/OpenMP/...) // is loaded but before the static constructors in the library are executed. -#define CHDIR_SUPPORT_BROKEN ( ( __GNUC__ == 4 && __GNUC_MINOR__ >= 6 && __GNUC_MINOR__ <= 9 ) || (__linux__ && __clang_major__ == 3 && __clang_minor__ == 5) ) +#define CHDIR_SUPPORT_BROKEN ( (__TBB_GCC_VERSION >= 40600 && __TBB_GCC_VERSION < 50200) || (__linux__ && __TBB_CLANG_VERSION == 30500) ) const int OMP_ParallelRegionSize = 16; int TBB_MaxThread = 4; // Includes master diff --git a/src/tbb/arena.cpp b/src/tbb/arena.cpp index 1fdaf89826..29a7e81df3 100644 --- a/src/tbb/arena.cpp +++ b/src/tbb/arena.cpp @@ -42,6 +42,11 @@ void generic_scheduler::attach_arena( arena* a, size_t index, bool is_master ) { my_arena_index = index; my_arena_slot = a->my_slots + index; attach_mailbox( affinity_id(index+1) ); + if ( is_master && my_inbox.is_idle_state( true ) ) { + // Master enters an arena with its own task to be executed. It means that master is not + // going to enter stealing loop and take affinity tasks. + my_inbox.set_is_idle( false ); + } #if __TBB_TASK_GROUP_CONTEXT // Context to be used by root tasks by default (if the user has not specified one). if( !is_master ) @@ -62,31 +67,54 @@ void generic_scheduler::attach_arena( arena* a, size_t index, bool is_master ) { #endif /* __TBB_TASK_PRIORITY */ } +inline static bool occupy_slot( generic_scheduler*& slot, generic_scheduler& s ) { + return !slot && as_atomic( slot ).compare_and_swap( &s, NULL ) == NULL; +} + +size_t arena::occupy_free_slot_in_range( generic_scheduler& s, size_t lower, size_t upper ) { + if ( lower >= upper ) return out_of_arena; + // Start search for an empty slot from the one we occupied the last time + size_t index = s.my_arena_index; + if ( index < lower || index >= upper ) index = s.my_random.get() % (upper - lower) + lower; + __TBB_ASSERT( index >= lower && index < upper, NULL ); + // Find a free slot + for ( size_t i = index; i < upper; ++i ) + if ( occupy_slot(my_slots[i].my_scheduler, s) ) return i; + for ( size_t i = lower; i < index; ++i ) + if ( occupy_slot(my_slots[i].my_scheduler, s) ) return i; + return out_of_arena; +} + +template +size_t arena::occupy_free_slot( generic_scheduler& s ) { + // Firstly, masters try to occupy reserved slots + size_t index = as_worker ? out_of_arena : occupy_free_slot_in_range( s, 0, my_num_reserved_slots ); + if ( index == out_of_arena ) { + // Secondly, all threads try to occupy all non-reserved slots + index = occupy_free_slot_in_range( s, my_num_reserved_slots, my_num_slots ); + // Likely this arena is already saturated + if ( index == out_of_arena ) + return out_of_arena; + } + + ITT_NOTIFY(sync_acquired, my_slots + index); + atomic_update( my_limit, (unsigned)(index + 1), std::less() ); + return index; +} + void arena::process( generic_scheduler& s ) { __TBB_ASSERT( is_alive(my_guard), NULL ); __TBB_ASSERT( governor::is_set(&s), NULL ); __TBB_ASSERT( !s.my_innermost_running_task, NULL ); __TBB_ASSERT( !s.my_dispatching_task, NULL ); - __TBB_ASSERT( my_num_slots != 1, NULL ); - // Start search for an empty slot from the one we occupied the last time - unsigned index = s.my_arena_index < my_num_slots ? s.my_arena_index : s.my_random.get() % (my_num_slots - 1) + 1, - end = index; - __TBB_ASSERT( index != 0, "A worker cannot occupy slot 0" ); - __TBB_ASSERT( index < my_num_slots, NULL ); + __TBB_ASSERT( my_num_slots > 1, NULL ); - // Find a vacant slot - for ( ;; ) { - if ( !my_slots[index].my_scheduler && as_atomic(my_slots[index].my_scheduler).compare_and_swap(&s, NULL ) == NULL ) - break; - if ( ++index == my_num_slots ) - index = 1; - if ( index == end ) { - // Likely this arena is already saturated - goto quit; - } - } - ITT_NOTIFY(sync_acquired, my_slots + index); + size_t index = occupy_free_slot( s ); + if ( index == out_of_arena ) + goto quit; + + __TBB_ASSERT( index >= my_num_reserved_slots, "Workers cannot occupy reserved slots" ); s.attach_arena( this, index, /*is_master*/false ); #if !__TBB_FP_CONTEXT @@ -98,13 +126,27 @@ void arena::process( generic_scheduler& s ) { my_observers.notify_entry_observers( s.my_last_local_observer, /*worker=*/true ); #endif /* __TBB_SCHEDULER_OBSERVER */ - atomic_update( my_limit, index + 1, std::less() ); + // Task pool can be marked as non-empty if the worker occupies the slot left by a master. + if ( s.my_arena_slot->task_pool != EmptyTaskPool ) { + __TBB_ASSERT( !s.my_innermost_running_task, NULL ); + __TBB_ASSERT( !s.my_dispatching_task, NULL ); + __TBB_ASSERT( s.my_inbox.is_idle_state(false), NULL ); + s.local_wait_for_all( *s.my_dummy_task, NULL ); + __TBB_ASSERT( s.my_inbox.is_idle_state(true), NULL ); + } for ( ;; ) { + __TBB_ASSERT( is_alive(my_guard), NULL ); + __TBB_ASSERT ( __TBB_load_relaxed(s.my_arena_slot->head) == __TBB_load_relaxed(s.my_arena_slot->tail), + "Worker cannot leave arena while its task pool is not empty" ); + __TBB_ASSERT( s.my_arena_slot->task_pool == EmptyTaskPool, "Empty task pool is not marked appropriately" ); + // This check prevents relinquishing more than necessary workers because + // of the non-atomicity of the decision making procedure + if (num_workers_active() > my_num_workers_allotted) + break; // Try to steal a task. // Passing reference count is technically unnecessary in this context, // but omitting it here would add checks inside the function. - __TBB_ASSERT( is_alive(my_guard), NULL ); task* t = s.receive_or_steal_task( s.my_dummy_task->prefix().ref_count ); if (t) { // A side effect of receive_or_steal_task is that my_innermost_running_task can be set. @@ -113,13 +155,6 @@ void arena::process( generic_scheduler& s ) { __TBB_ASSERT( !s.my_dispatching_task, NULL ); s.local_wait_for_all(*s.my_dummy_task,t); } - __TBB_ASSERT ( __TBB_load_relaxed(s.my_arena_slot->head) == __TBB_load_relaxed(s.my_arena_slot->tail), - "Worker cannot leave arena while its task pool is not empty" ); - __TBB_ASSERT( s.my_arena_slot->task_pool == EmptyTaskPool, "Empty task pool is not marked appropriately" ); - // This check prevents relinquishing more than necessary workers because - // of the non-atomicity of the decision making procedure - if (num_workers_active() > my_num_workers_allotted) - break; } #if __TBB_SCHEDULER_OBSERVER my_observers.notify_exit_observers( s.my_last_local_observer, /*worker=*/true ); @@ -148,7 +183,7 @@ void arena::process( generic_scheduler& s ) { on_thread_leaving(); } -arena::arena ( market& m, unsigned num_slots ) { +arena::arena ( market& m, unsigned num_slots, unsigned num_reserved_slots ) { __TBB_ASSERT( !my_guard, "improperly allocated arena?" ); __TBB_ASSERT( sizeof(my_slots[0]) % NFS_GetLineSize()==0, "arena::slot size not multiple of cache line size" ); __TBB_ASSERT( (uintptr_t)this % NFS_GetLineSize()==0, "arena misaligned" ); @@ -159,7 +194,8 @@ arena::arena ( market& m, unsigned num_slots ) { my_limit = 1; // Two slots are mandatory: for the master, and for 1 worker (required to support starvation resistant tasks). my_num_slots = num_slots_to_reserve(num_slots); - my_max_num_workers = num_slots-1; + my_num_reserved_slots = num_reserved_slots; + my_max_num_workers = num_slots-num_reserved_slots; my_references = 1; // accounts for the master #if __TBB_TASK_PRIORITY my_bottom_priority = my_top_priority = normalized_normal_priority; @@ -168,7 +204,7 @@ arena::arena ( market& m, unsigned num_slots ) { #if __TBB_SCHEDULER_OBSERVER my_observers.my_arena = this; #endif /* __TBB_SCHEDULER_OBSERVER */ - __TBB_ASSERT ( my_max_num_workers < my_num_slots, NULL ); + __TBB_ASSERT ( my_max_num_workers <= my_num_slots, NULL ); // Construct slots. Mark internal synchronization elements for the tools. for( unsigned i = 0; i < my_num_slots; ++i ) { __TBB_ASSERT( !my_slots[i].my_scheduler && !my_slots[i].task_pool, NULL ); @@ -190,7 +226,7 @@ arena::arena ( market& m, unsigned num_slots ) { #endif } -arena& arena::allocate_arena( market& m, unsigned num_slots ) { +arena& arena::allocate_arena( market& m, unsigned num_slots, unsigned num_reserved_slots ) { __TBB_ASSERT( sizeof(base_type) + sizeof(arena_slot) == sizeof(arena), "All arena data fields must go to arena_base" ); __TBB_ASSERT( sizeof(base_type) % NFS_GetLineSize() == 0, "arena slots area misaligned: wrong padding" ); __TBB_ASSERT( sizeof(mail_outbox) == NFS_MaxLineSize, "Mailbox padding is wrong" ); @@ -198,7 +234,7 @@ arena& arena::allocate_arena( market& m, unsigned num_slots ) { unsigned char* storage = (unsigned char*)NFS_Allocate( 1, n, NULL ); // Zero all slots to indicate that they are empty memset( storage, 0, n ); - return *new( storage + num_slots_to_reserve(num_slots) * sizeof(mail_outbox) ) arena(m, num_slots); + return *new( storage + num_slots_to_reserve(num_slots) * sizeof(mail_outbox) ) arena(m, num_slots, num_reserved_slots); } void arena::free_arena () { @@ -524,19 +560,18 @@ struct nested_arena_context : no_copy { generic_scheduler &my_scheduler; scheduler_state const my_orig_state; void *my_orig_ptr; - bool my_adjusting; - nested_arena_context(generic_scheduler *s, arena* a, bool needs_adjusting, bool as_worker = false) - : my_scheduler(*s), my_orig_state(*s), my_orig_ptr(NULL), my_adjusting(needs_adjusting) { - s->nested_arena_entry(a, *this, as_worker); + nested_arena_context(generic_scheduler *s, arena* a, size_t slot_index, bool as_worker) + : my_scheduler(*s), my_orig_state(*s), my_orig_ptr(NULL) { + s->nested_arena_entry(a, slot_index, *this, as_worker); } ~nested_arena_context() { my_scheduler.nested_arena_exit(*this); - (scheduler_state&)my_scheduler = my_orig_state; // restore arena settings + static_cast(my_scheduler) = my_orig_state; // restore arena settings governor::assume_scheduler( &my_scheduler ); } }; -void generic_scheduler::nested_arena_entry(arena* a, nested_arena_context& c, bool as_worker) { +void generic_scheduler::nested_arena_entry(arena* a, size_t slot_index, nested_arena_context& c, bool as_worker) { __TBB_ASSERT( is_alive(a->my_guard), NULL ); if( a == my_arena ) { #if __TBB_TASK_GROUP_CONTEXT @@ -551,8 +586,10 @@ void generic_scheduler::nested_arena_entry(arena* a, nested_arena_context& c, bo my_arena->orphan_offloaded_tasks( *this ); my_offloaded_tasks = NULL; #endif /* __TBB_TASK_PRIORITY */ - attach_arena( a, /*index*/0, /*is_master*/true ); + attach_arena( a, slot_index, /*is_master*/true ); + __TBB_ASSERT( my_arena == a, NULL ); my_innermost_running_task = my_dispatching_task = as_worker? NULL : my_dummy_task; + my_is_worker = as_worker; #if __TBB_TASK_GROUP_CONTEXT // save dummy's context and replace it by arena's context c.my_orig_ptr = my_dummy_task->prefix().context; @@ -565,9 +602,8 @@ void generic_scheduler::nested_arena_entry(arena* a, nested_arena_context& c, bo #endif // TODO? ITT_NOTIFY(sync_acquired, a->my_slots + index); // TODO: it requires market to have P workers (not P-1) - // TODO: it still allows temporary oversubscription by 1 worker (due to my_max_num_workers) // TODO: a preempted worker should be excluded from assignment to other arenas e.g. my_slack-- - if( c.my_adjusting ) my_arena->my_market->adjust_demand(*my_arena, -1); + if( !as_worker && slot_index >= my_arena->my_num_reserved_slots ) my_arena->my_market->adjust_demand(*my_arena, -1); } void generic_scheduler::nested_arena_exit(nested_arena_context& c) { @@ -577,7 +613,7 @@ void generic_scheduler::nested_arena_exit(nested_arena_context& c) { #endif return; } - if( c.my_adjusting ) my_arena->my_market->adjust_demand(*my_arena, 1); + if( !my_is_worker && my_arena_index >= my_arena->my_num_reserved_slots ) my_arena->my_market->adjust_demand(*my_arena, 1); #if __TBB_ARENA_OBSERVER my_arena->my_observers.notify_exit_observers( my_last_local_observer, /*worker=*/false ); #endif /* __TBB_SCHEDULER_OBSERVER */ @@ -587,8 +623,9 @@ void generic_scheduler::nested_arena_exit(nested_arena_context& c) { my_arena->orphan_offloaded_tasks( *this ); my_local_reload_epoch = *c.my_orig_state.my_ref_reload_epoch; #endif - // Free the master slot. TODO: support multiple masters - __TBB_store_with_release(my_arena->my_slots[0].my_scheduler, (generic_scheduler*)NULL); + // Free the master slot. + __TBB_ASSERT(my_arena->my_slots[my_arena_index].my_scheduler, "A slot is already empty"); + __TBB_store_with_release(my_arena->my_slots[my_arena_index].my_scheduler, (generic_scheduler*)NULL); my_arena->my_exit_monitors.notify_all_relaxed(); // TODO: fix concurrent monitor to use notify_one (test MultipleMastersPart4 fails) #if __TBB_TASK_GROUP_CONTEXT // restore context of dummy task @@ -617,14 +654,13 @@ namespace internal { void task_arena_base::internal_initialize( ) { governor::one_time_init(); - __TBB_ASSERT( my_master_slots <= 1, "Number of slots reserved for master can be only [0,1]"); - if( my_master_slots > 1 ) my_master_slots = 1; // TODO: make more masters bool default_concurrency_requested = false; if( my_max_concurrency < 1 ) { my_max_concurrency = (int)governor::default_num_threads(); default_concurrency_requested = true; } - arena* new_arena = market::create_arena( my_max_concurrency + 1-my_master_slots/*it's +1 slot for num_masters=0*/, + __TBB_ASSERT( my_master_slots <= (unsigned)my_max_concurrency, "Number of slots reserved for master should not exceed arena concurrency"); + arena* new_arena = market::create_arena( my_max_concurrency, my_master_slots, global_control::active_value(global_control::thread_stack_size), default_concurrency_requested ); // increases market's ref count for task_arena @@ -739,16 +775,15 @@ void task_arena_base::internal_execute( internal::delegate_base& d) const { generic_scheduler* s = governor::local_scheduler_weak(); __TBB_ASSERT(s, "Scheduler is not initialized"); // TODO: is it safe to assign slot to a scheduler which is not yet switched? - // TODO TEMP: one master, make more masters - if( s->my_arena == my_arena || (!__TBB_load_with_acquire(my_arena->my_slots[0].my_scheduler) - && as_atomic(my_arena->my_slots[0].my_scheduler).compare_and_swap(s, NULL ) == NULL) ) { + size_t index1 = s->my_arena == my_arena ? s->my_arena_index : my_arena->occupy_free_slot( *s ); + if ( index1 != arena::out_of_arena ) { cpu_ctl_env_helper cpu_ctl_helper; cpu_ctl_helper.set_env( __TBB_CONTEXT_ARG1(my_context) ); #if TBB_USE_EXCEPTIONS try { #endif //TODO: replace dummy tasks for workers as well to avoid using of the_dummy_context - nested_arena_context scope(s, my_arena, !my_master_slots); + nested_arena_context scope( s, my_arena, index1, /*as_worker*/false ); d(); #if TBB_USE_EXCEPTIONS } catch(...) { @@ -782,19 +817,18 @@ void task_arena_base::internal_execute( internal::delegate_base& d) const { my_arena->my_exit_monitors.cancel_wait(waiter); break; } - else if( !__TBB_load_with_acquire(my_arena->my_slots[0].my_scheduler) // TODO: refactor into a function? - && as_atomic(my_arena->my_slots[0].my_scheduler).compare_and_swap(s, NULL ) == NULL ) { + size_t index2 = my_arena->occupy_free_slot( *s ); + if( index2 != arena::out_of_arena ) { my_arena->my_exit_monitors.cancel_wait(waiter); - nested_arena_context scope(s, my_arena, !my_master_slots); + nested_arena_context scope(s, my_arena, index2, /*as_worker*/false); s->local_wait_for_all(root, NULL); #if TBB_USE_EXCEPTIONS __TBB_ASSERT( !exec_context.my_exception, NULL ); // exception can be thrown above, not deferred #endif __TBB_ASSERT( root.prefix().ref_count == 0, NULL ); break; - } else { - my_arena->my_exit_monitors.commit_wait(waiter); } + my_arena->my_exit_monitors.commit_wait(waiter); } while( __TBB_load_with_acquire(root.prefix().ref_count) == 2 ); #if TBB_USE_EXCEPTIONS // process possible exception @@ -811,8 +845,14 @@ class wait_task : public task { /*override*/ task* execute() { generic_scheduler* s = governor::local_scheduler_if_initialized(); __TBB_ASSERT( s, NULL ); - if( s->my_arena_index && s->worker_outermost_level() ) { - s->local_wait_for_all( *s->my_dummy_task, NULL ); // run remaining tasks + __TBB_ASSERT( s->master_outermost_level() || s->worker_outermost_level(), "The enqueued task can be processed only on outermost level" ); + if( s->is_worker() ) { + __TBB_ASSERT( !s->my_dispatching_task && s->my_innermost_running_task == this, NULL ); + // Mimic worker on outermost level to run remaining tasks + s->my_innermost_running_task = NULL; + s->local_wait_for_all( *s->my_dummy_task, NULL ); + __TBB_ASSERT( !s->my_dispatching_task && !s->my_innermost_running_task, NULL ); + s->my_innermost_running_task = this; } else s->my_arena->is_out_of_work(); // avoids starvation of internal_wait: issuing this task makes arena full my_signal.V(); return NULL; @@ -836,7 +876,7 @@ void task_arena_base::internal_wait() const { while( my_arena->my_pool_state != arena::SNAPSHOT_EMPTY ) { if( !__TBB_load_with_acquire(my_arena->my_slots[0].my_scheduler) // TODO TEMP: one master, make more masters && as_atomic(my_arena->my_slots[0].my_scheduler).compare_and_swap(s, NULL) == NULL ) { - nested_arena_context a(s, my_arena, !my_master_slots, true); + nested_arena_context a(s, my_arena, 0, true); s->wait_until_empty(); } else { binary_semaphore waiter; // TODO: replace by a single event notification from is_out_of_work diff --git a/src/tbb/arena.h b/src/tbb/arena.h index 7ee560afa7..4189588898 100644 --- a/src/tbb/arena.h +++ b/src/tbb/arena.h @@ -62,7 +62,7 @@ struct arena_base : padded { volatile intptr_t my_top_priority; // heavy use in stealing loop #endif /* !__TBB_TASK_PRIORITY */ - //! Maximal currently busy slot. + //! Maximal number of currently busy slots. atomic my_limit; // heavy use in stealing loop //! Task pool for the tasks scheduled via task::enqueue() method @@ -139,6 +139,9 @@ struct arena_base : padded { //! Number of slots in the arena unsigned my_num_slots; + //! Number of reserved slots (can be occupied only by masters) + unsigned my_num_reserved_slots; + //! Indicates if there is an oversubscribing worker created to service enqueued tasks. bool my_mandatory_concurrency; @@ -159,10 +162,10 @@ class arena: public padded typedef padded base_type; //! Constructor - arena ( market&, unsigned max_num_workers ); + arena ( market&, unsigned max_num_workers, unsigned num_reserved_slots ); //! Allocate an instance of arena. - static arena& allocate_arena( market&, unsigned num_slots ); + static arena& allocate_arena( market&, unsigned num_slots, unsigned num_reserved_slots ); static int unsigned num_slots_to_reserve ( unsigned num_slots ) { return max(2u, num_slots); @@ -235,6 +238,13 @@ class arena: public padded intptr_t workers_task_node_count(); #endif + static const size_t out_of_arena = ~size_t(0); + //! Tries to occupy a slot in the arena. On success, returns the slot index; if no slot is available, returns out_of_arena. + template + size_t occupy_free_slot( generic_scheduler& s ); + //! Tries to occupy a slot in the specified range. + size_t occupy_free_slot_in_range( generic_scheduler& s, size_t lower, size_t upper ); + /** Must be the last data field */ arena_slot my_slots[1]; }; // class arena diff --git a/src/tbb/cache_aligned_allocator.cpp b/src/tbb/cache_aligned_allocator.cpp index 50054653aa..79343f5e2b 100644 --- a/src/tbb/cache_aligned_allocator.cpp +++ b/src/tbb/cache_aligned_allocator.cpp @@ -181,23 +181,23 @@ size_t NFS_GetLineSize() { void* NFS_Allocate( size_t n, size_t element_size, void* /*hint*/ ) { //TODO: make this functionality available via an adaptor over generic STL like allocator - size_t cache_line_size = NFS_LineSize; - __TBB_ASSERT( cache_line_size <= NFS_MaxLineSize, "illegal value for NFS_LineSize" ); - __TBB_ASSERT( is_power_of_two(cache_line_size), "must be power of two" ); + const size_t nfs_cache_line_size = NFS_LineSize; + __TBB_ASSERT( nfs_cache_line_size <= NFS_MaxLineSize, "illegal value for NFS_LineSize" ); + __TBB_ASSERT( is_power_of_two(nfs_cache_line_size), "must be power of two" ); size_t bytes = n*element_size; - if (bytes() != segment_allocated()) - throw_exception(eid_bad_last_alloc); // throw custom exception, because it's hard to recover correctly after segment_allocation_failed state + enforce_segment_allocated(s.load()); //it's hard to recover correctly after segment_allocation_failed state return s; } @@ -196,8 +195,7 @@ namespace internal { const void *arg; safe_init_body(internal_array_op2 init, const void *src) : func(init), arg(src) {} void operator()(segment_t &s, void *begin, size_type n) const { - if(s.load() != segment_allocated()) - throw_exception(eid_bad_last_alloc); // throw custom exception + enforce_segment_allocated(s.load()); func( begin, arg, n ); } }; @@ -280,10 +278,11 @@ concurrent_vector_base_v3::size_type concurrent_vector_base_v3::helper::enable_s array0 = s[0].load(); } ITT_NOTIFY(sync_acquired, &s[0]); - if(array0 != segment_allocated()) { // check for segment_allocation_failed state of initial segment - publish_segment(s[k], segment_allocation_failed()); // and assign segment_allocation_failed state here - throw_exception(eid_bad_last_alloc); // throw custom exception - } + + segment_scope_guard k_segment_guard(s[k], false); + enforce_segment_allocated(array0); // initial segment should be allocated + k_segment_guard.dismiss(); + publish_segment( s[k], static_cast(array0.pointer() + segment_base(k)*element_size ) ); @@ -399,8 +398,7 @@ void concurrent_vector_base_v3::internal_assign( const concurrent_vector_base_v3 size_type b=segment_base(k); size_type new_end = b>=n ? b : n; __TBB_ASSERT( my_early_size>new_end, NULL ); - if( my_segment[k].load() != segment_allocated()) // check vector was broken before - throw_exception(eid_bad_last_alloc); // throw custom exception + enforce_segment_allocated(my_segment[k].load()); //if vector was broken before // destructors are supposed to not throw any exceptions destroy( my_segment[k].load().pointer() + element_size*(new_end-b), my_early_size-new_end ); my_early_size = new_end; @@ -417,8 +415,8 @@ void concurrent_vector_base_v3::internal_assign( const concurrent_vector_base_v3 helper::extend_table_if_necessary(*this, k, 0); if( my_segment[k].load() == segment_not_used()) helper::enable_segment(*this, k, element_size); - else if( my_segment[k].load() != segment_allocated() ) - throw_exception(eid_bad_last_alloc); // throw custom exception + else + enforce_segment_allocated(my_segment[k].load()); size_type m = k? segment_size(k) : 2; if( m > n-b ) m = n-b; size_type a = 0; @@ -475,8 +473,7 @@ concurrent_vector_base_v3::size_type concurrent_vector_base_v3::internal_grow_to backoff.pause(); ITT_NOTIFY(sync_acquired, &s); } - if( my_segment[i].load() != segment_allocated() ) - throw_exception(eid_bad_last_alloc); + enforce_segment_allocated(my_segment[i].load()); } #if TBB_USE_DEBUG size_type capacity = internal_capacity(); diff --git a/src/tbb/governor.cpp b/src/tbb/governor.cpp index e12f266d71..50710a66a3 100644 --- a/src/tbb/governor.cpp +++ b/src/tbb/governor.cpp @@ -180,30 +180,30 @@ generic_scheduler* governor::init_scheduler_weak() { generic_scheduler* governor::init_scheduler( int num_threads, stack_size_type stack_size, bool auto_init ) { one_time_init(); - uintptr_t v = theTLS.get(); - generic_scheduler* s = tls_scheduler_of( v ); - if( v&1 ) { // TLS holds scheduler instance with arena - __TBB_ASSERT( s->my_arena, "TLS is marked for scheduler with arena" ); - s->my_ref_count += 1; - return s; - } - if( v ) { //TLS holds scheduler instance without arena - __TBB_ASSERT( !s->my_arena, "TLS is marked for scheduler without arena" ); - __TBB_ASSERT( s->my_auto_initialized, "weakly initialized scheduler is supposed to be auto-initialized" ); - s->attach_arena( market::create_arena( default_num_threads(), 0, true ), 0, /*is_master*/true ); - __TBB_ASSERT( s->my_arena_index == 0, "Master thread must occupy the first slot in its arena" ); - s->my_arena_slot->my_scheduler = s; - s->my_arena->my_default_ctx = s->default_context(); // it also transfers implied ownership - s->my_ref_count += 1; - assume_scheduler( s ); + if ( uintptr_t v = theTLS.get() ) { + generic_scheduler* s = tls_scheduler_of( v ); + if ( (v&1) == 0 ) { // TLS holds scheduler instance without arena + __TBB_ASSERT( s->my_ref_count == 1, "weakly initialized scheduler must have refcount equal to 1" ); + __TBB_ASSERT( !s->my_arena, "weakly initialized scheduler must have no arena" ); + __TBB_ASSERT( s->my_auto_initialized, "weakly initialized scheduler is supposed to be auto-initialized" ); + s->attach_arena( market::create_arena( default_num_threads(), 1, 0, true ), 0, /*is_master*/true ); + __TBB_ASSERT( s->my_arena_index == 0, "Master thread must occupy the first slot in its arena" ); + s->my_arena_slot->my_scheduler = s; + s->my_arena->my_default_ctx = s->default_context(); // it also transfers implied ownership + // Mark the scheduler as fully initialized + assume_scheduler( s ); + } + // Increment refcount only for explicit instances of task_scheduler_init. + if ( !auto_init ) s->my_ref_count += 1; + __TBB_ASSERT( s->my_arena, "scheduler is not initialized fully" ); return s; } // Create new scheduler instance with arena bool default_concurrency_requested = num_threads == task_scheduler_init::automatic; if( default_concurrency_requested ) num_threads = default_num_threads(); - arena *a = market::create_arena( num_threads, stack_size, default_concurrency_requested ); - s = generic_scheduler::create_master( a ); + arena *a = market::create_arena( num_threads, 1, stack_size, default_concurrency_requested ); + generic_scheduler* s = generic_scheduler::create_master( a ); __TBB_ASSERT(s, "Somehow a local scheduler creation for a master thread failed"); __TBB_ASSERT( is_set(s), NULL ); s->my_auto_initialized = auto_init; @@ -214,20 +214,24 @@ void governor::terminate_scheduler( generic_scheduler* s, const task_scheduler_i __TBB_ASSERT( is_set(s), "Attempt to terminate non-local scheduler instance" ); if (--(s->my_ref_count)) { // can't throw exception, because this is on dtor's call chain - __TBB_ASSERT_RELEASE( !BlockingTSI || BlockingTSI!=tsi_ptr, + __TBB_ASSERT_RELEASE( BlockingTSI!=tsi_ptr, "Attempt to terminate nested scheduler in blocking mode" ); } else { + bool needs_wait_workers = false; + if ( BlockingTSI==tsi_ptr ) { + needs_wait_workers = true; + BlockingTSI = NULL; #if TBB_USE_ASSERT - if (BlockingTSI) { - __TBB_ASSERT( BlockingTSI == tsi_ptr, "For blocking termination last terminate_scheduler must be blocking." ); IsBlockingTerminationInProgress = true; - } #endif - s->cleanup_master(); + } + s->cleanup_master( needs_wait_workers ); __TBB_ASSERT( is_set(NULL), "cleanup_master has not cleared its TLS slot" ); - BlockingTSI = NULL; #if TBB_USE_ASSERT - IsBlockingTerminationInProgress = false; + if ( needs_wait_workers ) { + __TBB_ASSERT( IsBlockingTerminationInProgress, NULL ); + IsBlockingTerminationInProgress = false; + } #endif } } @@ -236,12 +240,11 @@ void governor::auto_terminate(void* arg){ generic_scheduler* s = tls_scheduler_of( uintptr_t(arg) ); // arg is equivalent to theTLS.get() if( s && s->my_auto_initialized ) { if( !--(s->my_ref_count) ) { - __TBB_ASSERT( !BlockingTSI, "Blocking auto-terminate is not supported." ); // If the TLS slot is already cleared by OS or underlying concurrency // runtime, restore its value. if( !is_set(s) ) assume_scheduler(s); - s->cleanup_master(); + s->cleanup_master( /*needs_wait_workers=*/false ); __TBB_ASSERT( is_set(NULL), "cleanup_master has not cleared its TLS slot" ); } } diff --git a/src/tbb/governor.h b/src/tbb/governor.h index cc10e89693..e45805f07f 100644 --- a/src/tbb/governor.h +++ b/src/tbb/governor.h @@ -151,8 +151,6 @@ class governor { static void initialize_rml_factory (); - static bool needsWaitWorkers () { return BlockingTSI!=NULL; } - static bool does_client_join_workers (const tbb::internal::rml::tbb_client &client); //! Must be called before init_scheduler diff --git a/src/tbb/market.cpp b/src/tbb/market.cpp index 8d388fdeff..fe24fd7b23 100644 --- a/src/tbb/market.cpp +++ b/src/tbb/market.cpp @@ -226,12 +226,13 @@ bool governor::does_client_join_workers (const tbb::internal::rml::tbb_client &c return ((const market&)client).must_join_workers(); } -arena* market::create_arena ( int num_slots, size_t stack_size, bool default_concurrency_requested ) { +arena* market::create_arena ( int num_slots, int num_reserved_slots, size_t stack_size, bool default_concurrency_requested ) { __TBB_ASSERT( num_slots > 0, NULL ); - market &m = global_market( num_slots-1, stack_size, default_concurrency_requested, + __TBB_ASSERT( num_reserved_slots <= num_slots, NULL ); + market &m = global_market( num_slots-num_reserved_slots, stack_size, default_concurrency_requested, /*is_public*/ true ); // increases market's public ref count - arena& a = arena::allocate_arena( m, min(num_slots, (int)m.my_num_workers_hard_limit) ); + arena& a = arena::allocate_arena( m, num_slots, num_reserved_slots ); // Add newly created arena into the existing market's list. arenas_list_mutex_type::scoped_lock lock(m.my_arenas_list_mutex); m.insert_arena_into_list(a); @@ -377,7 +378,7 @@ void market::update_allotment ( intptr_t highest_affected_priority ) { pl.workers_available = 0; arena_list_type::iterator it = pl.arenas.begin(); for ( ; it != pl.arenas.end(); ++it ) { - __TBB_ASSERT( it->my_num_workers_requested || !it->my_num_workers_allotted, NULL ); + __TBB_ASSERT( it->my_num_workers_requested >= 0 || !it->my_num_workers_allotted, NULL ); it->my_num_workers_allotted = 0; } } @@ -410,7 +411,6 @@ void market::adjust_demand ( arena& a, int delta ) { priority_level_info &pl = my_priority_levels[p]; pl.workers_requested += delta; __TBB_ASSERT( pl.workers_requested >= 0, NULL ); - __TBB_ASSERT( a.my_num_workers_requested >= 0, NULL ); if ( a.my_num_workers_requested <= 0 ) { if ( a.my_top_priority != normalized_normal_priority ) { GATHER_STATISTIC( ++governor::local_scheduler_if_initialized()->my_counters.arena_prio_resets ); @@ -431,6 +431,8 @@ void market::adjust_demand ( arena& a, int delta ) { } else if ( p > my_global_top_priority ) { __TBB_ASSERT( pl.workers_requested > 0, NULL ); + // TODO: investigate if the following invariant is always valid + __TBB_ASSERT( a.my_num_workers_requested >= 0, NULL ); update_global_top_priority(p); a.my_num_workers_allotted = min( (int)my_num_workers_soft_limit, a.my_num_workers_requested ); my_priority_levels[p - 1].workers_available = my_num_workers_soft_limit - a.my_num_workers_allotted; @@ -600,6 +602,7 @@ bool market::lower_arena_priority ( arena& a, intptr_t new_priority, uintptr_t o } bool market::update_arena_priority ( arena& a, intptr_t new_priority ) { + // TODO: do not acquire this global lock while checking arena's state. arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex); __TBB_ASSERT( my_global_top_priority >= a.my_top_priority || a.my_num_workers_requested <= 0, NULL ); diff --git a/src/tbb/market.h b/src/tbb/market.h index 848bfcf0d0..887224ba66 100644 --- a/src/tbb/market.h +++ b/src/tbb/market.h @@ -256,7 +256,7 @@ class market : no_copy, rml::tbb_client { //! Creates an arena object /** If necessary, also creates global market instance, and boosts its ref count. Each call to create_arena() must be matched by the call to arena::free_arena(). **/ - static arena* create_arena ( int num_slots, size_t stack_size, bool default_concurrency_requested ); + static arena* create_arena ( int num_slots, int num_reserved_slots, size_t stack_size, bool default_concurrency_requested ); //! Removes the arena from the market's list void try_destroy_arena ( arena*, uintptr_t aba_epoch ); diff --git a/src/tbb/scheduler.cpp b/src/tbb/scheduler.cpp index 4517115369..c8ea9e19a1 100644 --- a/src/tbb/scheduler.cpp +++ b/src/tbb/scheduler.cpp @@ -1021,6 +1021,7 @@ generic_scheduler* generic_scheduler::create_worker( market& m, size_t index ) { __TBB_ASSERT(index, "workers should have index > 0"); s->my_arena_index = index; // index is not a real slot in arena yet s->my_dummy_task->prefix().ref_count = 2; + s->my_is_worker = true; governor::sign_on(s); return s; } @@ -1044,13 +1045,14 @@ generic_scheduler* generic_scheduler::create_master( arena* a ) { s->my_market->my_masters.push_front( *s ); lock.release(); #endif /* __TBB_TASK_GROUP_CONTEXT */ + s->my_is_worker = false; if( a ) { // Master thread always occupies the first slot s->attach_arena( a, /*index*/0, /*is_master*/true ); s->my_arena_slot->my_scheduler = s; a->my_default_ctx = s->default_context(); // also transfers implied ownership } - __TBB_ASSERT( !s->is_worker(), "Master thread must occupy the first slot in its arena" ); + __TBB_ASSERT( s->my_arena_index == 0, "Master thread must occupy the first slot in its arena" ); governor::sign_on(s); #if _WIN32||_WIN64 @@ -1074,7 +1076,7 @@ void generic_scheduler::cleanup_worker( void* arg, bool worker ) { s.free_scheduler(); } -void generic_scheduler::cleanup_master() { +void generic_scheduler::cleanup_master( bool needs_wait_workers ) { arena* const a = my_arena; market * const m = my_market; __TBB_ASSERT( my_market, NULL ); @@ -1122,9 +1124,7 @@ void generic_scheduler::cleanup_master() { my_arena_slot = NULL; // detached from slot free_scheduler(); // TODO: read global settings for the parameter at that point - // if workers are not joining, market can be released from on_thread_leaving(), - // so keep copy the state on local stack - bool must_join = m->join_workers = governor::needsWaitWorkers(); + m->join_workers = needs_wait_workers; if( a ) { #if __TBB_STATISTICS_EARLY_DUMP // Resetting arena to EMPTY state (as earlier TBB versions did) should not be @@ -1137,7 +1137,7 @@ void generic_scheduler::cleanup_master() { #endif a->on_thread_leaving(); } - if( must_join ) + if( needs_wait_workers ) m->wait_workers(); m->release( /*is_public*/ a != NULL ); // TODO: ideally, it should always be true } diff --git a/src/tbb/scheduler.h b/src/tbb/scheduler.h index d3a301ca37..16685af17b 100644 --- a/src/tbb/scheduler.h +++ b/src/tbb/scheduler.h @@ -91,6 +91,7 @@ struct scheduler_state { //! Pointer to market's (for workers) or current arena's (for the master) reload epoch counter. volatile uintptr_t *my_ref_reload_epoch; #endif /* __TBB_TASK_PRIORITY */ + bool my_is_worker; }; //! Work stealing task scheduler. @@ -255,7 +256,7 @@ class generic_scheduler: public scheduler static generic_scheduler* create_master( arena* a ); //! Perform necessary cleanup when a master thread stops using TBB. - void cleanup_master(); + void cleanup_master( bool needs_wait_workers ); //! Initialize a scheduler for a worker thread. static generic_scheduler* create_worker( market& m, size_t index ); @@ -278,7 +279,7 @@ class generic_scheduler: public scheduler void attach_arena( arena*, size_t index, bool is_master ); #if __TBB_TASK_ARENA - void nested_arena_entry( arena*, nested_arena_context &, bool as_worker ); + void nested_arena_entry( arena*, size_t, nested_arena_context &, bool as_worker ); void nested_arena_exit( nested_arena_context & ); void wait_until_empty(); #endif @@ -540,7 +541,7 @@ inline void generic_scheduler::attach_mailbox( affinity_id id ) { } inline bool generic_scheduler::is_worker() { - return my_arena_index != 0; //TODO: rework for multiple master + return my_is_worker; } inline unsigned generic_scheduler::number_of_workers_in_my_arena() { diff --git a/src/tbb/task_group_context.cpp b/src/tbb/task_group_context.cpp index 9de7f1d68b..83a009c7c9 100644 --- a/src/tbb/task_group_context.cpp +++ b/src/tbb/task_group_context.cpp @@ -467,15 +467,18 @@ void task_group_context::set_priority ( priority_t prio ) { internal::generic_scheduler* s = governor::local_scheduler_if_initialized(); if ( !s || !s->my_arena || !s->my_market->propagate_task_group_state(&task_group_context::my_priority, *this, p) ) return; - // Updating arena priority here does not eliminate necessity of checking each - // task priority and updating arena priority if necessary before the task execution. - // These checks will be necessary because: - // a) set_priority() may be invoked before any tasks from this task group are spawned; - // b) all spawned tasks from this task group are retrieved from the task pools. - // These cases create a time window when arena priority may be lowered. - s->my_market->update_arena_priority( *s->my_arena, p ); + //! TODO: the arena of the calling thread might be unrelated; // need to find out the right arena for priority update. + // The executing status check only guarantees being inside some working arena. + if ( s->my_innermost_running_task->state() == task::executing ) + // Updating arena priority here does not eliminate necessity of checking each + // task priority and updating arena priority if necessary before the task execution. + // These checks will be necessary because: + // a) set_priority() may be invoked before any tasks from this task group are spawned; + // b) all spawned tasks from this task group are retrieved from the task pools. + // These cases create a time window when arena priority may be lowered. + s->my_market->update_arena_priority( *s->my_arena, p ); } priority_t task_group_context::priority () const { diff --git a/src/tbb/tbb_misc.cpp b/src/tbb/tbb_misc.cpp index 2064269a58..f4f58d36cd 100644 --- a/src/tbb/tbb_misc.cpp +++ b/src/tbb/tbb_misc.cpp @@ -196,12 +196,12 @@ bool cpu_has_speculation() { #if __TBB_TSX_AVAILABLE #if (__INTEL_COMPILER || __GNUC__ || _MSC_VER || __SUNPRO_CC) bool result = false; - const int hle_ebx_mask = 1<<4; + const int rtm_ebx_mask = 1<<11; #if _MSC_VER int info[4] = {0,0,0,0}; const int reg_ebx = 1; __cpuidex(info, 7, 0); - result = (info[reg_ebx] & hle_ebx_mask)!=0; + result = (info[reg_ebx] & rtm_ebx_mask)!=0; #elif __GNUC__ || __SUNPRO_CC int32_t reg_ebx = 0; int32_t reg_eax = 7; @@ -216,7 +216,7 @@ bool cpu_has_speculation() { #endif "edx" ); - result = (reg_ebx & hle_ebx_mask)!=0 ; + result = (reg_ebx & rtm_ebx_mask)!=0 ; #endif return result; #else diff --git a/src/tbbmalloc/backend.cpp b/src/tbbmalloc/backend.cpp index 4bbf2cdf75..86f10149a6 100644 --- a/src/tbbmalloc/backend.cpp +++ b/src/tbbmalloc/backend.cpp @@ -97,7 +97,37 @@ void HugePagesStatus::doPrintStatus(bool state, const char *stateName) fputs("\n", stderr); } -void *Backend::allocRawMem(size_t &size) const +#if CHECK_ALLOCATION_RANGE + +void Backend::UsedAddressRange::registerAlloc(uintptr_t left, uintptr_t right) +{ + MallocMutex::scoped_lock lock(mutex); + if (left < leftBound) + leftBound = left; + if (right > rightBound) + rightBound = right; + MALLOC_ASSERT(leftBound, ASSERT_TEXT); + MALLOC_ASSERT(leftBound < rightBound, ASSERT_TEXT); + MALLOC_ASSERT(leftBound <= left && right <= rightBound, ASSERT_TEXT); +} + +void Backend::UsedAddressRange::registerFree(uintptr_t left, uintptr_t right) +{ + MallocMutex::scoped_lock lock(mutex); + if (leftBound == left) { + if (rightBound == right) { + leftBound = ADDRESS_UPPER_BOUND; + rightBound = 0; + } else + leftBound = right; + } else if (rightBound == right) + rightBound = left; + MALLOC_ASSERT((!rightBound && leftBound == ADDRESS_UPPER_BOUND) + || leftBound < rightBound, ASSERT_TEXT); +} +#endif // CHECK_ALLOCATION_RANGE + +void *Backend::allocRawMem(size_t &size) { void *res = NULL; size_t allocSize; @@ -128,13 +158,15 @@ void *Backend::allocRawMem(size_t &size) const if ( res ) { size = allocSize; + if (!extMemPool->userPool()) + usedAddrRange.registerAlloc((uintptr_t)res, (uintptr_t)res+size); AtomicAdd((intptr_t&)totalMemSize, size); } return res; } -bool Backend::freeRawMem(void *object, size_t size) const +bool Backend::freeRawMem(void *object, size_t size) { bool fail; AtomicAdd((intptr_t&)totalMemSize, -size); @@ -142,6 +174,7 @@ bool Backend::freeRawMem(void *object, size_t size) const MALLOC_ASSERT(!extMemPool->fixedPool, "No free for fixed-size pools."); fail = (*extMemPool->rawFree)(extMemPool->poolId, object, size); } else { + usedAddrRange.registerFree((uintptr_t)object, (uintptr_t)object + size); hugePages.registerReleasing(object, size); fail = freeRawMemory(object, size); } @@ -980,6 +1013,7 @@ void *Backend::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment MemRegion *oldRegion = static_cast(right)->memRegion; MALLOC_ASSERT( oldRegion < ptr, ASSERT_TEXT ); + const size_t oldRegionSize = oldRegion->allocSz; if (oldRegion->type != MEMREG_ONE_BLOCK) return NULL; // we are not single in the region const size_t userOffset = (uintptr_t)ptr - (uintptr_t)oldRegion; @@ -1024,6 +1058,9 @@ void *Backend::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment header->memoryBlock = lmb; MALLOC_ASSERT((uintptr_t)lmb + lmb->unalignedSize >= (uintptr_t)object + lmb->objectSize, "An object must fit to the block."); + + usedAddrRange.registerFree((uintptr_t)oldRegion, (uintptr_t)oldRegion + oldRegionSize); + usedAddrRange.registerAlloc((uintptr_t)region, (uintptr_t)region + requestSize); return object; } #endif /* BACKEND_HAS_MREMAP */ @@ -1363,6 +1400,7 @@ FreeBlock *Backend::addNewRegion(size_t size, MemRegionType memRegType, bool add void Backend::init(ExtMemoryPool *extMemoryPool) { extMemPool = extMemoryPool; + usedAddrRange.init(); coalescQ.init(&bkndSync); bkndSync.init(this); } diff --git a/src/tbbmalloc/frontend.cpp b/src/tbbmalloc/frontend.cpp index bc06d658cf..09317abc82 100644 --- a/src/tbbmalloc/frontend.cpp +++ b/src/tbbmalloc/frontend.cpp @@ -2444,7 +2444,8 @@ static inline bool isSmallObject (void *ptr) /**** Check if an object was allocated by scalable_malloc ****/ static inline bool isRecognized (void* ptr) { - return isLargeObject(ptr) || isSmallObject(ptr); + return defaultMemPool->extMemPool.backend.ptrCanBeValid(ptr) && + (isLargeObject(ptr) || isSmallObject(ptr)); } static inline void freeSmallObject(MemoryPool *memPool, void *object) @@ -2853,15 +2854,21 @@ extern "C" void __TBB_malloc_safer_free(void *object, void (*original_free)(void if (!object) return; - // must check 1st for large object, because small object check touches 4 pages on left, - // and it can be inaccessible - if (isLargeObject(object)) { - TLSData *tls = defaultMemPool->getTLS(/*create=*/false); - - defaultMemPool->putToLLOCache(tls, object); - } else if (isSmallObject(object)) { - freeSmallObject(defaultMemPool, object); - } else if (original_free) + // tbbmalloc can allocate object only when tbbmalloc has been initialized + if (FencedLoad(mallocInitialized) && defaultMemPool->extMemPool.backend.ptrCanBeValid(object)) { + if (isLargeObject(object)) { + // must check 1st for large object, because small object check touches 4 pages on left, + // and it can be inaccessible + TLSData *tls = defaultMemPool->getTLS(/*create=*/false); + + defaultMemPool->putToLLOCache(tls, object); + return; + } else if (isSmallObject(object)) { + freeSmallObject(defaultMemPool, object); + return; + } + } + if (original_free) original_free(object); } @@ -2903,7 +2910,7 @@ extern "C" void* __TBB_malloc_safer_realloc(void* ptr, size_t sz, void* original if (!ptr) { tmp = internalMalloc(sz); - } else if (isRecognized(ptr)) { + } else if (FencedLoad(mallocInitialized) && isRecognized(ptr)) { if (!sz) { internalFree(ptr); return NULL; @@ -3029,7 +3036,7 @@ extern "C" void * __TBB_malloc_safer_aligned_realloc(void *ptr, size_t size, siz if (!ptr) { tmp = allocateAligned(defaultMemPool, size, alignment); - } else if (isRecognized(ptr)) { + } else if (FencedLoad(mallocInitialized) && isRecognized(ptr)) { if (!size) { internalFree(ptr); return NULL; @@ -3094,7 +3101,7 @@ extern "C" size_t __TBB_malloc_safer_msize(void *object, size_t (*original_msize { if (object) { // Check if the memory was allocated by scalable_malloc - if (isRecognized(object)) + if (FencedLoad(mallocInitialized) && isRecognized(object)) return internalMsize(object); else if (original_msize) return original_msize(object); @@ -3113,7 +3120,7 @@ extern "C" size_t __TBB_malloc_safer_aligned_msize(void *object, size_t alignmen { if (object) { // Check if the memory was allocated by scalable_malloc - if (isRecognized(object)) + if (FencedLoad(mallocInitialized) && isRecognized(object)) return internalMsize(object); else if (orig_aligned_msize) return orig_aligned_msize(object,alignment,offset); diff --git a/src/tbbmalloc/proxy.cpp b/src/tbbmalloc/proxy.cpp index 13b4569f28..438b3f0e27 100644 --- a/src/tbbmalloc/proxy.cpp +++ b/src/tbbmalloc/proxy.cpp @@ -331,38 +331,42 @@ void* __TBB_malloc_safer__aligned_realloc_##CRTLIB( void *ptr, size_t size, size return __TBB_malloc_safer_aligned_realloc( ptr, size, aligment, &func_ptrs ); \ } -// limit is 30 bytes/60 symbols per line +// limit is 30 bytes/60 symbols per line, * can be used to match any digit in bytecodes const char* known_bytecodes[] = { #if _WIN64 + #if __TBB_OVERLOAD_OLD_MSVCR "4883EC284885C974", //release free() win64 - "4883EC384885C975", //release msize() win64 "4885C974375348", //release free() 8.0.50727.42 win64 - "48894C24084883EC28BA", //debug prologue for win64 "4C8BC1488B0DA6E4040033", //win64 SDK - "4883EC284885C975", //release msize() 10.0.21003.1 win64 "48895C2408574883EC20", //release _aligned_msize() win64 + #endif + "4883EC384885C975", //release msize() 9.0 win64 + "48894C24084883EC28BA", //debug prologue for win64 + "4883EC284885C975", //release msize() 10.0.21003.1 win64 "4C894424184889542410", //debug _aligned_msize() win64 -#else - "558BEC6A018B", //debug free() & _msize() 8.0.50727.4053 win32 +#else // _WIN64 + #if __TBB_OVERLOAD_OLD_MSVCR "6A1868********E8", //release free() 8.0.50727.4053 win32 - "6A1C68********E8", //release _msize() 8.0.50727.4053 win32 - "558BEC837D08000F", //release _msize() 11.0.51106.1 win32 - "8BFF558BEC6A", //debug free() & _msize() 9.0.21022.8 win32 - "8BFF558BEC83", //debug free() & _msize() 10.0.21003.1 win32 "8BFF558BEC8B4508", //release _aligned_msize() 10.0 win32 + #endif + "6A1C68********E8", //release _msize() 8.0.50727.4053, 9.0 win32 + "558BEC6A018B", //debug free() & _msize() 11.0 win32 + "558BEC837D08000F", //release _msize() 11.0.51106.1 win32 + "8BFF558BEC6A", //debug free() & _msize() 10.0.40219.325 win32 + "8BFF558BEC83", //release free() & _msize() 10.0.40219.325 win32 "8BFF558BEC8B4510", //debug _aligned_msize() 10.0 win32 "558BEC8B451050", //debug _aligned_msize() 11.0 win32 -#endif +#endif // _WIN64 NULL }; -#define __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_ENTRY(CRT_VER,function_name,dbg_modifier) \ +#define __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_ENTRY(CRT_VER,function_name,dbg_modifier) \ ReplaceFunctionWithStore( #CRT_VER #dbg_modifier ".dll", #function_name, (FUNCPTR)__TBB_malloc_safer_##function_name##_##CRT_VER##dbg_modifier, known_bytecodes, (FUNCPTR*)&orig_##function_name##_##CRT_VER##dbg_modifier ); -#define __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_ENTRY_NO_FALLBACK(CRT_VER,function_name,dbg_modifier) \ +#define __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_ENTRY_NO_FALLBACK(CRT_VER,function_name,dbg_modifier) \ ReplaceFunctionWithStore( #CRT_VER #dbg_modifier ".dll", #function_name, (FUNCPTR)__TBB_malloc_safer_##function_name##_##CRT_VER##dbg_modifier, 0, NULL ); -#define __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_ENTRY_REDIRECT(CRT_VER,function_name,dest_func,dbg_modifier) \ +#define __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_ENTRY_REDIRECT(CRT_VER,function_name,dest_func,dbg_modifier) \ ReplaceFunctionWithStore( #CRT_VER #dbg_modifier ".dll", #function_name, (FUNCPTR)__TBB_malloc_safer_##dest_func##_##CRT_VER##dbg_modifier, 0, NULL ); @@ -381,12 +385,14 @@ const char* known_bytecodes[] = { __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_RELEASE(CRT_VER) \ __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_DEBUG(CRT_VER) +#if __TBB_OVERLOAD_OLD_MSVCR __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr70d); __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr70); __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr71d); __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr71); __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr80d); __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr80); +#endif __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr90d); __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr90); __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr100d); @@ -441,8 +447,6 @@ void operator_delete_arr_t(void* ptr, const std::nothrow_t&) throw() { } const char* modules_to_replace[] = { - "msvcr80d.dll", - "msvcr80.dll", "msvcr90d.dll", "msvcr90.dll", "msvcr100d.dll", @@ -451,11 +455,16 @@ const char* modules_to_replace[] = { "msvcr110.dll", "msvcr120d.dll", "msvcr120.dll", -// "ucrtbase.dll", + "ucrtbase.dll", +// "ucrtbased.dll" is not supported because of problems with _dbg functions +#if __TBB_OVERLOAD_OLD_MSVCR + "msvcr80d.dll", + "msvcr80.dll", "msvcr70d.dll", "msvcr70.dll", "msvcr71d.dll", "msvcr71.dll", +#endif #if __TBB_TODO // TODO: Try enabling replacement for non-versioned system binaries below "msvcrtd.dll", @@ -526,27 +535,29 @@ typedef wchar_t unicode_char_t; void ReplaceFunctionWithStore( const unicode_char_t *dllName, const char *funcName, FUNCPTR newFunc, const char ** opcodes, FUNCPTR* origFunc, FRR_ON_ERROR on_error = FRR_FAIL ) { - FRR_TYPE type = ReplaceFunction( dllName, funcName, newFunc, opcodes, origFunc ); - if (type == FRR_NODLL) return; - if (type != FRR_OK && on_error == FRR_FAIL) - { - fprintf(stderr, "Failed to replace function %s in module %s\n", - funcName, dllName); - exit(1); - } + FRR_TYPE res = ReplaceFunction( dllName, funcName, newFunc, opcodes, origFunc ); + + if (res == FRR_OK || res == FRR_NODLL || (res == FRR_NOFUNC && on_error == FRR_IGNORE)) + return; + + fprintf(stderr, "Failed to %s function %s in module %s\n", + res==FRR_NOFUNC? "find" : "replace", funcName, dllName); + exit(1); } void doMallocReplacement() { // Replace functions and keep backup of original code (separate for each runtime) +#if __TBB_OVERLOAD_OLD_MSVCR __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL(msvcr70) __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL(msvcr71) __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL(msvcr80) +#endif __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL(msvcr90) __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL(msvcr100) __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL(msvcr110) __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL(msvcr120) -// __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_RELEASE(ucrtbase) + __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_RELEASE(ucrtbase) // Replace functions without storing original code for (size_t j = 0; j < arrayLength(modules_to_replace); j++) { diff --git a/src/tbbmalloc/tbbmalloc_internal.h b/src/tbbmalloc/tbbmalloc_internal.h index 255f00e5a9..14b5b0585c 100644 --- a/src/tbbmalloc/tbbmalloc_internal.h +++ b/src/tbbmalloc/tbbmalloc_internal.h @@ -37,6 +37,7 @@ // TODO: *BSD also has it #define BACKEND_HAS_MREMAP __linux__ +#define CHECK_ALLOCATION_RANGE MALLOC_DEBUG || MALLOC_ZONE_OVERLOAD_ENABLED || MALLOC_UNIXLIKE_OVERLOAD_ENABLED #include "tbb/tbb_config.h" // for __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN #if __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN @@ -731,30 +732,64 @@ class Backend { inline bool operator()(size_t oldMaxReq, size_t requestSize) const; }; - ExtMemoryPool *extMemPool; +#if CHECK_ALLOCATION_RANGE + // Keep min and max of all addresses requested from OS, + // use it for checking memory possibly allocated by replaced allocators + // and for debugging purposes. Valid only for default memory pool. + class UsedAddressRange { + static const uintptr_t ADDRESS_UPPER_BOUND = UINTPTR_MAX; + + uintptr_t leftBound, + rightBound; + MallocMutex mutex; + public: + // rightBound is zero-initialized + void init() { leftBound = ADDRESS_UPPER_BOUND; } + void registerAlloc(uintptr_t left, uintptr_t right); + void registerFree(uintptr_t left, uintptr_t right); + // as only left and right bounds are kept, we can return true + // for pointer not allocated by us, if more than single region + // was requested from OS + bool inRange(void *ptr) const { + const uintptr_t p = (uintptr_t)ptr; + return leftBound<=p && p<=rightBound; + } + }; +#else + class UsedAddressRange { + public: + void init() { } + void registerAlloc(uintptr_t, uintptr_t) {} + void registerFree(uintptr_t, uintptr_t) {} + bool inRange(void *) const { return true; } + }; +#endif + + ExtMemoryPool *extMemPool; // used for release every region on pool destroying - MemRegionList regionList; + MemRegionList regionList; - CoalRequestQ coalescQ; // queue of coalescing requests - BackendSync bkndSync; + CoalRequestQ coalescQ; // queue of coalescing requests + BackendSync bkndSync; // semaphore protecting adding more more memory from OS MemExtendingSema memExtendingSema; - size_t totalMemSize, - memSoftLimit; + size_t totalMemSize, + memSoftLimit; + UsedAddressRange usedAddrRange; // to keep 1st allocation large than requested, keep bootstrapping status enum { bootsrapMemNotDone = 0, bootsrapMemInitializing, bootsrapMemDone }; - intptr_t bootsrapMemStatus; - MallocMutex bootsrapMemStatusMutex; + intptr_t bootsrapMemStatus; + MallocMutex bootsrapMemStatusMutex; // Using of maximal observed requested size allows decrease // memory consumption for small requests and decrease fragmentation // for workloads when small and large allocation requests are mixed. // TODO: decrease, not only increase it - size_t maxRequestedSize; + size_t maxRequestedSize; FreeBlock *addNewRegion(size_t size, MemRegionType type, bool addToBin); FreeBlock *findBlockInRegion(MemRegion *region, size_t exactBlockSize); @@ -780,8 +815,8 @@ class Backend { void removeBlockFromBin(FreeBlock *fBlock); - void *allocRawMem(size_t &size) const; - bool freeRawMem(void *object, size_t size) const; + void *allocRawMem(size_t &size); + bool freeRawMem(void *object, size_t size); void putLargeBlock(LargeMemoryBlock *lmb); void releaseCachesToLimit(); @@ -820,6 +855,8 @@ class Backend { } inline size_t getMaxBinnedSize() const; + bool ptrCanBeValid(void *ptr) const { return usedAddrRange.inRange(ptr); } + #if __TBB_MALLOC_WHITEBOX_TEST size_t getTotalMemSize() const { return totalMemSize; } #endif diff --git a/src/test/harness.h b/src/test/harness.h index fd68be4ec9..d738498dbe 100644 --- a/src/test/harness.h +++ b/src/test/harness.h @@ -131,7 +131,7 @@ void print_call_stack() { #elif __SUNPRO_CC REPORT("Call stack info:\n"); printstack(fileno(stdout)); - #elif _WIN32_WINNT > 0x0501 && _MSC_VER && !__TBB_WIN8UI_SUPPORT + #elif _WIN32_WINNT > 0x0501 && _MSC_VER>=1500 && !__TBB_WIN8UI_SUPPORT const int sz = 62; // XP limitation for number of frames void *buff[sz]; int n = CaptureStackBackTrace(0, sz, buff, NULL); diff --git a/src/test/harness_allocator_overload.h b/src/test/harness_allocator_overload.h new file mode 100644 index 0000000000..28222f6033 --- /dev/null +++ b/src/test/harness_allocator_overload.h @@ -0,0 +1,39 @@ +/* + Copyright 2005-2015 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. Threading Building Blocks is free software; + you can redistribute it and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. Threading Building Blocks is + distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. You should have received a copy of + the GNU General Public License along with Threading Building Blocks; if not, write to the + Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software library without + restriction. Specifically, if other files instantiate templates or use macros or inline + functions from this file, or you compile this file and link it with other files to produce + an executable, this file does not by itself cause the resulting executable to be covered + by the GNU General Public License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General Public License. +*/ + +#ifndef tbb_test_harness_allocator_overload_H +#define tbb_test_harness_allocator_overload_H + +#include "../tbbmalloc/proxy.h" // for MALLOC_UNIXLIKE_OVERLOAD_ENABLED, MALLOC_ZONE_OVERLOAD_ENABLED +#include "tbb/tbb_config.h" // for __TBB_WIN8UI_SUPPORT + +// Skip configurations with unsupported system malloc overload: +// skip unsupported MSVCs, WIN8UI and MINGW (it doesn't define _MSC_VER), +// no support for MSVC 2015 in debug for now, +// don't use defined(_MSC_VER), because result of using defined() in macro expansion is undefined +#define MALLOC_WINDOWS_OVERLOAD_ENABLED ((_WIN32||_WIN64) && !__TBB_WIN8UI_SUPPORT && _MSC_VER >= 1500 && !(_MSC_VER == 1900 && _DEBUG)) + +// Skip configurations with unsupported system malloc overload: +// * overload via linking with -lmalloc_proxy is broken in offload, +// as the library is loaded too late in that mode, +// * LD_PRELOAD mechanism is broken in offload +#define HARNESS_SKIP_TEST ((!MALLOC_WINDOWS_OVERLOAD_ENABLED && !MALLOC_UNIXLIKE_OVERLOAD_ENABLED && !MALLOC_ZONE_OVERLOAD_ENABLED) || __TBB_MIC_OFFLOAD) + +#endif // tbb_test_harness_allocator_overload_H diff --git a/src/test/harness_defs.h b/src/test/harness_defs.h index a4bde1f3ed..78581108ef 100644 --- a/src/test/harness_defs.h +++ b/src/test/harness_defs.h @@ -123,7 +123,7 @@ #define __TBB_THROW_FROM_DTOR_BROKEN (__clang__ && (__apple_build_version__ && __apple_build_version__ < 5000279 || __TBB_CLANG_VERSION && __TBB_CLANG_VERSION < 50000)) //std::uncaught_exception is broken on some version of stdlibc++ (it returns true with no active exception) -#define __TBB_STD_UNCAUGHT_EXCEPTION_BROKEN (__linux__ && (__TBB_GCC_VERSION == 40407 || __TBB_GCC_VERSION == 40902)) +#define __TBB_STD_UNCAUGHT_EXCEPTION_BROKEN (__linux__ && (__TBB_GCC_VERSION == 40407 || __TBB_GCC_VERSION == 40802 || __TBB_GCC_VERSION == 40902)) #if __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN #define _EXCEPTION_PTR_H /* prevents exception_ptr.h inclusion */ diff --git a/src/test/harness_iterator.h b/src/test/harness_iterator.h index a16dd6ab3b..a43a37b55f 100644 --- a/src/test/harness_iterator.h +++ b/src/test/harness_iterator.h @@ -91,8 +91,10 @@ class RandomIterator { T& operator* () { return *my_ptr; } RandomIterator& operator++ () { ++my_ptr; return *this; } bool operator== ( const RandomIterator& r ) { return my_ptr == r.my_ptr; } - difference_type operator- (const RandomIterator &r) {return my_ptr - r.my_ptr;} + bool operator!= ( const RandomIterator& r ) { return my_ptr != r.my_ptr; } + difference_type operator- (const RandomIterator &r) const {return my_ptr - r.my_ptr;} RandomIterator operator+ (difference_type n) {return RandomIterator(my_ptr + n);} + bool operator< (const RandomIterator &r) const {return my_ptr < r.my_ptr;} }; template @@ -116,8 +118,10 @@ class ConstRandomIterator { const T& operator* () { return *my_ptr; } ConstRandomIterator& operator++ () { ++my_ptr; return *this; } bool operator== ( const ConstRandomIterator& r ) { return my_ptr == r.my_ptr; } - difference_type operator- (const ConstRandomIterator &r) {return my_ptr - r.my_ptr;} + bool operator!= ( const ConstRandomIterator& r ) { return my_ptr != r.my_ptr; } + difference_type operator- (const ConstRandomIterator &r) const {return my_ptr - r.my_ptr;} ConstRandomIterator operator+ (difference_type n) {return ConstRandomIterator(my_ptr + n);} + bool operator< (const ConstRandomIterator &r) const {return my_ptr < r.my_ptr;} }; } // namespace Harness diff --git a/src/test/harness_tsx.h b/src/test/harness_tsx.h index fba4b78f87..4dc084dedb 100644 --- a/src/test/harness_tsx.h +++ b/src/test/harness_tsx.h @@ -44,8 +44,8 @@ bool have_TSX() { const int reg_ebx = 1; int old_ecx = 0; __cpuidex(info, 7, old_ecx); - result = (info[reg_ebx] & hle_ebx_mask)!=0; - if( result ) ASSERT( (info[reg_ebx] & rtm_ebx_mask)!=0, NULL ); + result = (info[reg_ebx] & rtm_ebx_mask)!=0; + if( result ) ASSERT( (info[reg_ebx] & hle_ebx_mask)!=0, NULL ); #elif __GNUC__ || __SUNPRO_CC int32_t reg_ebx = 0; int32_t reg_eax = 7; @@ -60,8 +60,8 @@ bool have_TSX() { #endif "edx" ); - result = (reg_ebx & hle_ebx_mask)!=0 ; - if( result ) ASSERT( (reg_ebx & rtm_ebx_mask)!=0, NULL ); + result = (reg_ebx & rtm_ebx_mask)!=0 ; + if( result ) ASSERT( (reg_ebx & hle_ebx_mask)!=0, NULL ); #endif return result; } diff --git a/src/test/test_allocator.h b/src/test/test_allocator.h index 61bf2fe1f0..4a5d3ecb06 100644 --- a/src/test/test_allocator.h +++ b/src/test/test_allocator.h @@ -59,6 +59,33 @@ inline char PseudoRandomValue( size_t j, size_t k ) { return char(j*3 ^ j>>4 ^ k); } +#if __APPLE__ +#include +#include + +// A RAII class to disable stderr in a certain scope. It's not thread-safe. +class DisableStderr { + int stderrCopy; + static void dupToStderrAndClose(int fd) { + int ret = dup2(fd, STDERR_FILENO); // close current stderr + ASSERT(ret != -1, NULL); + ret = close(fd); + ASSERT(ret != -1, NULL); + } +public: + DisableStderr() { + int devNull = open("/dev/null", O_WRONLY); + ASSERT(devNull != -1, NULL); + stderrCopy = dup(STDERR_FILENO); + ASSERT(stderrCopy != -1, NULL); + dupToStderrAndClose(devNull); + } + ~DisableStderr() { + dupToStderrAndClose(stderrCopy); + } +}; +#endif + //! T is type and A is allocator for that type template void TestBasic( A& a ) { @@ -138,6 +165,11 @@ void TestBasic( A& a ) { bool exception_caught = false; typename A::pointer p1 = NULL; try { +#if __APPLE__ + // On OS X*, failure to map memory results in messages to stderr; + // suppress them. + DisableStderr disableStderr; +#endif p1 = a.allocate(too_big); } catch ( std::bad_alloc ) { exception_caught = true; diff --git a/src/test/test_atomic.cpp b/src/test/test_atomic.cpp index cd454de930..75ef987cca 100644 --- a/src/test/test_atomic.cpp +++ b/src/test/test_atomic.cpp @@ -472,7 +472,7 @@ namespace TestConstExprInitializationOfGlobalObjectsHelper{ static_before(){ result = (static_atomic==ct_value); } \ } ; \ \ - typename tester::static_before tester::static_before_; \ + tester::static_before tester::static_before_; \ tbb::atomic tester::static_atomic(ct_value); \ \ auto_registered_tests_helper::registration tester::registered; \ diff --git a/src/test/test_malloc_atexit.cpp b/src/test/test_malloc_atexit.cpp index b2013c0eb0..b10a2c625a 100644 --- a/src/test/test_malloc_atexit.cpp +++ b/src/test/test_malloc_atexit.cpp @@ -27,12 +27,7 @@ */ #include -#include "../tbbmalloc/proxy.h" // __TBB_malloc_safer_msize -#include "tbb/tbb_config.h" // for __TBB_WIN8UI_SUPPORT - -#if !(_WIN32||_WIN64 || MALLOC_UNIXLIKE_OVERLOAD_ENABLED || MALLOC_ZONE_OVERLOAD_ENABLED) || __TBB_WIN8UI_SUPPORT || __MINGW32__ || __MINGW64__ -#define HARNESS_SKIP_TEST 1 -#endif +#include "harness_allocator_overload.h" // __TBB_malloc_safer_msize() returns 0 for unknown objects, // thus we can detect ownership @@ -156,9 +151,6 @@ int TestMain () { #ifdef _PGO_INSTRUMENT REPORT("Known issue: test_malloc_atexit hangs if compiled with -prof-genx\n"); return Harness::Skipped; -#elif __TBB_MIC_OFFLOAD - REPORT("Known issue: libmalloc_proxy.so is loaded too late in the offload mode on the target when linked via -lmalloc_proxy\n"); - return Harness::Skipped; #else ASSERT( dll_isMallocOverloaded(), "malloc was not replaced" ); ASSERT( exe_isMallocOverloaded(), "malloc was not replaced" ); diff --git a/src/test/test_malloc_overload.cpp b/src/test/test_malloc_overload.cpp index ff9137ebac..a758e7b0e8 100644 --- a/src/test/test_malloc_overload.cpp +++ b/src/test/test_malloc_overload.cpp @@ -36,22 +36,15 @@ #define _ISOC11_SOURCE 1 // to get C11 declarations for GLIBC #define HARNESS_NO_PARSE_COMMAND_LINE 1 -#include "tbb/tbb_config.h" // to get __TBB_WIN8UI_SUPPORT +#include "harness_allocator_overload.h" -#if __linux__ || __APPLE__ -#define MALLOC_REPLACEMENT_AVAILABLE 1 -#elif _WIN32 && !__MINGW32__ && !__MINGW64__ && !__TBB_WIN8UI_SUPPORT -#define MALLOC_REPLACEMENT_AVAILABLE 2 +#if MALLOC_WINDOWS_OVERLOAD_ENABLED #include "tbb/tbbmalloc_proxy.h" #endif -// LD_PRELOAD mechanism is broken in offload, no support for MSVC 2015 in debug for now -#if __TBB_MIC_OFFLOAD || !MALLOC_REPLACEMENT_AVAILABLE || (_MSC_VER >= 1900 && _DEBUG) -#define HARNESS_SKIP_TEST 1 -#endif #include "harness.h" -#if MALLOC_REPLACEMENT_AVAILABLE +#if !HARNESS_SKIP_TEST #if __ANDROID__ #include // for __ANDROID_API__ @@ -76,7 +69,7 @@ #endif #include #include -#if MALLOC_REPLACEMENT_AVAILABLE == 1 +#if MALLOC_UNIXLIKE_OVERLOAD_ENABLED || MALLOC_ZONE_OVERLOAD_ENABLED #include // for sysconf #include #endif @@ -199,9 +192,9 @@ static void scalableMallocCheckSize(void *object, size_t size) ASSERT(uintptr_t(lmb)objectSize >= size, NULL); } -#if MALLOC_REPLACEMENT_AVAILABLE == 1 +#if MALLOC_UNIXLIKE_OVERLOAD_ENABLED || MALLOC_ZONE_OVERLOAD_ENABLED ASSERT(malloc_usable_size(object) >= size, NULL); -#elif MALLOC_REPLACEMENT_AVAILABLE == 2 +#elif MALLOC_WINDOWS_OVERLOAD_ENABLED // Check that _msize works correctly ASSERT(_msize(object) >= size, NULL); ASSERT(size<8 || _aligned_msize(object,8,0) >= size, NULL); @@ -226,7 +219,7 @@ void CheckStdFuncOverload(void *(*malloc_p)(size_t), void *(*calloc_p)(size_t, s free_p(ptr1); } -#if MALLOC_REPLACEMENT_AVAILABLE == 1 +#if MALLOC_UNIXLIKE_OVERLOAD_ENABLED || MALLOC_ZONE_OVERLOAD_ENABLED void CheckMemalignFuncOverload(void *(*memalign_p)(size_t, size_t), void (*free_p)(void*)) @@ -259,7 +252,7 @@ void CheckPvalloc(void *(*pvalloc_p)(size_t), void (*free_p)(void*)) } } -#endif // MALLOC_REPLACEMENT_AVAILABLE +#endif // MALLOC_UNIXLIKE_OVERLOAD_ENABLED || MALLOC_ZONE_OVERLOAD_ENABLED // regression test: on OS X scalable_free() treated small aligned object, // placed in large block, as small block @@ -273,7 +266,7 @@ void CheckFreeAligned() { #if __TBB_POSIX_MEMALIGN_PRESENT int ret = posix_memalign(&ptr, align[a], sz[s]); ASSERT(!ret, NULL); -#elif MALLOC_REPLACEMENT_AVAILABLE == 2 +#elif MALLOC_WINDOWS_OVERLOAD_ENABLED ptr = _aligned_malloc(sz[s], align[a]); #endif ASSERT(is_aligned(ptr, align[a]), NULL); @@ -317,7 +310,7 @@ void TestZoneOverload() { int TestMain() { void *ptr, *ptr1; -#if MALLOC_REPLACEMENT_AVAILABLE == 1 +#if MALLOC_UNIXLIKE_OVERLOAD_ENABLED || MALLOC_ZONE_OVERLOAD_ENABLED ASSERT(dlsym(RTLD_DEFAULT, "scalable_malloc"), "Lost dependence on malloc_proxy or LD_PRELOAD was not set?"); #endif @@ -347,7 +340,7 @@ int TestMain() { free(newEnv); CheckStdFuncOverload(malloc, calloc, realloc, free); -#if MALLOC_REPLACEMENT_AVAILABLE == 1 +#if MALLOC_UNIXLIKE_OVERLOAD_ENABLED || MALLOC_ZONE_OVERLOAD_ENABLED #if __TBB_POSIX_MEMALIGN_PRESENT int ret = posix_memalign(&ptr, 1024, 3*minLargeObjectSize); @@ -385,7 +378,7 @@ int TestMain() { #endif #endif // __linux__ -#elif MALLOC_REPLACEMENT_AVAILABLE == 2 +#else // MALLOC_WINDOWS_OVERLOAD_ENABLED ptr = _aligned_malloc(minLargeObjectSize, 16); scalableMallocCheckSize(ptr, minLargeObjectSize); @@ -429,4 +422,4 @@ int TestMain() { return Harness::Done; } -#endif /* MALLOC_REPLACEMENT_AVAILABLE */ +#endif // !HARNESS_SKIP_TEST diff --git a/src/test/test_malloc_pools.cpp b/src/test/test_malloc_pools.cpp index 3b8fb06146..1dbfa97868 100644 --- a/src/test/test_malloc_pools.cpp +++ b/src/test/test_malloc_pools.cpp @@ -331,16 +331,18 @@ static void *fixedBufGetMem(intptr_t pool_id, size_t &bytes) return ((FixedPoolHeadBase*)pool_id)->useData(bytes); } -class FixedPoolRun: NoAssign { - Harness::SpinBarrier *startB; +class FixedPoolUse: NoAssign { + static Harness::SpinBarrier startB; rml::MemoryPool *pool; size_t reqSize; int iters; public: - FixedPoolRun(Harness::SpinBarrier *b, rml::MemoryPool *p, size_t sz, int it) : - startB(b), pool(p), reqSize(sz), iters(it) {} + FixedPoolUse(unsigned threads, rml::MemoryPool *p, size_t sz, int it) : + pool(p), reqSize(sz), iters(it) { + startB.initialize(threads); + } void operator()( int /*id*/ ) const { - startB->wait(); + startB.wait(); for (int i=0; i head; - - pool_create_v1((intptr_t)&head, &pol, &pool); - void *largeObj = pool_malloc(pool, MAX_OBJECT); - ASSERT(largeObj, NULL); - pool_free(pool, largeObj); - - largeObj = pool_malloc(pool, MAX_OBJECT); - ASSERT(largeObj, NULL); - pool_free(pool, largeObj); - - for (int i=0; i head; - largeObj = pool_malloc(pool, MAX_OBJECT); - ASSERT(largeObj, NULL); - pool_free(pool, largeObj); + pool_create_v1((intptr_t)&head, &pol, &pool); + { + NativeParallelFor( 1, FixedPoolUse(1, pool, MAX_OBJECT, 2) ); - // each thread asks for an MAX_OBJECT/p/2 object, - // /2 is to cover fragmentation - for (int p=MinThread; p<=MaxThread; p++) { - Harness::SpinBarrier startB(p); - NativeParallelFor( p, FixedPoolRun(&startB, pool, - MAX_OBJECT/p/2, 10000) ); - } - { - size_t maxSz; - int p = 512; - Harness::SpinBarrier barrier(p); + for (int i=0; i head; pool_create_v1((intptr_t)&head, &pol, &pool); int p=128; - Harness::SpinBarrier startB(p); - NativeParallelFor( p, FixedPoolRun(&startB, pool, MAX_OBJECT/p/2, 1) ); + NativeParallelFor( p, FixedPoolUse(p, pool, MAX_OBJECT/p/2, 1) ); bool ok = pool_destroy(pool); ASSERT(ok, NULL); } @@ -650,7 +649,7 @@ void TestPoolCreation() for (created=0; created0, NULL); - else ASSERT(tbb::task_arena::current_thread_index()==0, NULL); + ASSERT(tbb::task_arena::current_thread_index()<(myMaxConcurrency>1?myMaxConcurrency:2), NULL); + if(is_worker) ASSERT(tbb::task_arena::current_thread_index()>=myNumReservedSlots, NULL); } /*override*/ void on_scheduler_exit( bool is_worker ) { @@ -155,9 +157,12 @@ class ArenaObserver : public tbb::task_scheduler_observer { old_id.local() = 0; } public: - ArenaObserver(tbb::task_arena &a, int id) : tbb::task_scheduler_observer(a) { - ASSERT(id, NULL); - myId = id; + ArenaObserver(tbb::task_arena &a, int maxConcurrency, int numReservedSlots, int id) + : tbb::task_scheduler_observer(a) + , myId(id) + , myMaxConcurrency(maxConcurrency) + , myNumReservedSlots(numReservedSlots) { + ASSERT(myId, NULL); observe(true); } ~ArenaObserver () { @@ -182,9 +187,9 @@ void TestConcurrentArenas(int p) { //Harness::ConcurrencyTracker::Reset(); tbb::task_arena a1; a1.initialize(1,0); - ArenaObserver o1(a1, p*2+1); + ArenaObserver o1(a1, 1, 0, p*2+1); tbb::task_arena a2(2,1); - ArenaObserver o2(a2, p*2+2); + ArenaObserver o2(a2, 2, 1, p*2+2); Harness::SpinBarrier barrier(2); AsynchronousWork work(barrier); a1.enqueue(work); // put async work @@ -318,12 +323,30 @@ class MultipleMastersPart4 : NoAssign { } }; +class MultipleMastersPart5 : NoAssign { + tbb::task_arena &my_a; + Harness::SpinBarrier &my_b; + +public: + MultipleMastersPart5( tbb::task_arena &a, Harness::SpinBarrier &b) : my_a(a), my_b(b) {} + // NativeParallelFor's functor + void operator()(int) const { + local_id.local() = 1; + my_a.execute(*this); + } + // Arena's functor + void operator()() const { + ASSERT( local_id.local() == 1, "Unexpected thread." ); + my_b.timed_wait( 10 ); + } +}; + void TestMultipleMasters(int p) { { REMARK("multiple masters, part 1\n"); tbb::task_arena a(1,0); a.initialize(); - ArenaObserver o(a, 1); + ArenaObserver o(a, 1, 0, 1); Harness::SpinBarrier barrier1(p), barrier2(2*p+1); // each of p threads will submit two tasks signaling the barrier NativeParallelFor( p, MultipleMastersBody(a, barrier1, barrier2) ); barrier2.timed_wait(10); @@ -331,7 +354,7 @@ void TestMultipleMasters(int p) { } { REMARK("multiple masters, part 2\n"); tbb::task_arena a(2,1); - ArenaObserver o(a, 2); + ArenaObserver o(a, 2, 1, 2); Harness::SpinBarrier barrier(p+2); a.enqueue(AsynchronousWork(barrier, /*blocking=*/true)); // occupy the worker, a regression test for bug 1981 NativeParallelFor( p, MultipleMastersPart2(a, barrier) ); @@ -349,11 +372,18 @@ void TestMultipleMasters(int p) { int c = p%3? (p%2? p : 2) : 3; REMARK("multiple masters, part 4: contexts, arena(%d)\n", c); tbb::task_arena a(c, 1); - ArenaObserver o(a, c); + ArenaObserver o(a, c, 1, c); Harness::SpinBarrier barrier(c); MultipleMastersPart4 test(a, barrier); NativeParallelFor(p, test); a.debug_wait_until_empty(); + } { + // Check if multiple masters can achive maximum concurrency. + REMARK("multiple masters, part 5: masters on barrier, arena(%d)\n", p); + tbb::task_arena a(p, 1); + Harness::SpinBarrier barrier(p); + MultipleMastersPart5 test(a, barrier); + NativeParallelFor(p, test); } } @@ -470,6 +500,115 @@ void TestArenaEntryConsistency() { body.test(i); } +class TestArenaMaxParallelismBody : NoAssign { + tbb::task_arena &my_a; + int my_p; + Harness::SpinBarrier *my_barrier; +public: + TestArenaMaxParallelismBody( tbb::task_arena &a, int p, Harness::SpinBarrier *b = NULL ) : my_a( a ), my_p( p ), my_barrier(b) {} + // NativeParallelFor's functor + void operator()( int ) const { + my_a.execute( *this ); + } + // Arena's functor + void operator()() const { + int idx = tbb::task_arena::current_thread_index(); + ASSERT( idx < (my_p > 1 ? my_p : 2), NULL ); + if ( my_barrier ) my_barrier->timed_wait( 10 ); + else Harness::Sleep( 10 ); + } +}; + +void TestArenaMaxParallelism( int p ) { + { + tbb::task_arena a( p, 0 ); + Harness::SpinBarrier b( p ); + TestArenaMaxParallelismBody test( a, p, &b ); + for ( int i = 1; i < p; ++i ) + a.enqueue( test ); + a.execute( test ); + a.debug_wait_until_empty(); + } + { + tbb::task_arena a( p, 1 ); + Harness::SpinBarrier b( p ); + TestArenaMaxParallelismBody test( a, p, &b ); + for ( int i = 1; i < p; ++i ) + a.enqueue( test ); + a.execute( test ); + a.debug_wait_until_empty(); + } + { + tbb::task_arena a( p, 0 ); + NativeParallelFor( 2*p, TestArenaMaxParallelismBody( a, p ) ); + a.debug_wait_until_empty(); + } + { + tbb::task_arena a( p, 1 ); + NativeParallelFor( 2*p, TestArenaMaxParallelismBody( a, p ) ); + a.debug_wait_until_empty(); + } +} + +class TestArenaReservedMasterSlotsBody : NoAssign { + tbb::task_arena &my_a; + Harness::SpinBarrier &my_barrier; + Harness::SpinBarrier &my_worker_barrier; + int my_max_concurrency; + int my_reserved_slots; +public: + TestArenaReservedMasterSlotsBody( tbb::task_arena &a, Harness::SpinBarrier &b, Harness::SpinBarrier &worker_b, int max_concurrency, int reserved_slots ) + : my_a( a ), my_barrier(b), my_worker_barrier(worker_b), my_max_concurrency(max_concurrency), my_reserved_slots(reserved_slots) {} + // NativeParallelFor's functor + void operator()( int ) const { + local_id.local() = 1; + my_a.execute( *this ); + } + // Arena's functor + void operator()() const { + int idx = tbb::task_arena::current_thread_index(); + ASSERT( idx < (my_max_concurrency > 1 ? my_max_concurrency : 2), NULL ); + if ( local_id.local() != 1 ) { + // Worker thread + ASSERT( idx >= my_reserved_slots, NULL ); + my_worker_barrier.timed_wait( 10 ); + } else { + ASSERT( idx < my_reserved_slots, "Masters are not supposed to occupy non-reserved slots in this test" ); + } + my_barrier.timed_wait( 10 ); + } +}; + +void TestArenaReservedMasterSlots( int p ) { + for ( int reserved_slots = 0; reserved_slots <= p; ++reserved_slots ) { + tbb::task_arena a( p, reserved_slots ); + Harness::SpinBarrier barrier(p); + Harness::SpinBarrier worker_barrier( p - reserved_slots + 1 ); + TestArenaReservedMasterSlotsBody test( a, barrier, worker_barrier, p, reserved_slots ); + for ( int i = reserved_slots; i < p; ++i ) + a.enqueue( test ); + worker_barrier.timed_wait( 10 ); + if ( reserved_slots ) + NativeParallelFor( reserved_slots, test ); + a.debug_wait_until_empty(); + ResetTLS(); + } +} + +struct test_functor_t { + void operator()() { ASSERT( false, "Non-const operator called" ); } + void operator()() const { /* library requires this overload only */ } +}; + +void TestConstantFunctorRequirement() { + tbb::task_arena a; + test_functor_t tf; + a.enqueue( tf ); +#if __TBB_TASK_PRIORITY + a.enqueue( tf, tbb::priority_normal ); +#endif +} + int TestMain () { // TODO: a workaround for temporary p-1 issue in market tbb::task_scheduler_init init_market_p_plus_one(MaxThread+1); @@ -480,7 +619,10 @@ int TestMain () { ResetTLS(); TestMultipleMasters( p ); ResetTLS(); + TestArenaMaxParallelism( p ); } TestArenaEntryConsistency(); + TestArenaReservedMasterSlots( MaxThread ); + TestConstantFunctorRequirement(); return Harness::Done; } diff --git a/src/test/test_task_group.cpp b/src/test/test_task_group.cpp index 5d60f34410..02d1c4cb73 100644 --- a/src/test/test_task_group.cpp +++ b/src/test/test_task_group.cpp @@ -810,6 +810,18 @@ void TestStructuredWait () { sg.wait(); } +struct test_functor_t { + void operator()() { ASSERT( false, "Non-const operator called" ); } + void operator()() const { /* library requires this overload only */ } +}; + +void TestConstantFunctorRequirement() { + tbb::task_group g; + test_functor_t tf; + g.run( tf ); g.wait(); + g.run_and_wait( tf ); +} + int TestMain () { REMARK ("Testing %s task_group functionality\n", TBBTEST_USE_TBB ? "TBB" : "PPL"); for( int p=MinThread; p<=MaxThread; ++p ) { @@ -855,6 +867,7 @@ int TestMain () { s->Release(); #endif } + TestConstantFunctorRequirement(); #if __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN REPORT("Known issue: exception handling tests are skipped.\n"); #endif diff --git a/src/test/test_task_priority.cpp b/src/test/test_task_priority.cpp index 033ad4cc2f..5fbda70326 100644 --- a/src/test/test_task_priority.cpp +++ b/src/test/test_task_priority.cpp @@ -538,6 +538,24 @@ void TestSetPriority() { delete g_trees[t][i]; } }//namespace test_propagation + +namespace regression { +// This is a regression test for a bug with task_group_context used from a thread that created its local scheduler but not the implicit arena +class TestTGContext { +public: + void operator() (int) const { + tbb::task_group_context ctx; + ctx.cancel_group_execution(); // initializes the local weak scheduler on the thread + ctx.set_priority(tbb::priority_high); + } +}; + +void TestTGContextOnNewThread() { + REMARK("Testing a regression for a bug with task_group_context\n"); + TestTGContext body; + NativeParallelFor(1, body); +} +}//namespace regression_priorities #endif /* __TBB_TASK_PRIORITY */ #if !__TBB_TEST_SKIP_AFFINITY @@ -573,6 +591,7 @@ int TestMain () { TestPrioritySwitchBetweenTwoMasters(); PreemptionActivatorId = 1; TestPrioritySwitchBetweenTwoMasters(); + regression::TestTGContextOnNewThread(); return Harness::Done; } diff --git a/src/test/test_tbb_fork.cpp b/src/test/test_tbb_fork.cpp index 823c51f601..4fb1934116 100644 --- a/src/test/test_tbb_fork.cpp +++ b/src/test/test_tbb_fork.cpp @@ -106,8 +106,8 @@ class RunWorkersBody : NoAssign { RunWorkersBody(bool waitWorkers) : wait_workers(waitWorkers) {} void operator()(const int /*threadID*/) const { tbb::task_scheduler_init sch(MaxThread, 0, wait_workers); - tbb::parallel_for(tbb::blocked_range(0, 10000, 1), AllocTask(), - tbb::simple_partitioner()); + tbb::parallel_for(tbb::blocked_range(0, 10000, 1), AllocTask(), + tbb::simple_partitioner()); } }; @@ -120,11 +120,35 @@ void TestBlockNonblock() } } +class RunInNativeThread : NoAssign { + bool create_tsi; +public: + RunInNativeThread(bool create_tsi_) : create_tsi(create_tsi_) {} + void operator()(const int /*threadID*/) const { + // nested TSI or auto-initialized TSI can be terminated when + // wait_workers is true (deferred TSI means auto-initialization) + tbb::task_scheduler_init tsi(create_tsi? 2 : + tbb::task_scheduler_init::deferred); + tbb::parallel_for(tbb::blocked_range(0, 10000, 1), AllocTask(), + tbb::simple_partitioner()); + } +}; + +void TestTasksInThread() +{ + tbb::task_scheduler_init sch(2, 0, /*wait_workers=*/true); + tbb::parallel_for(tbb::blocked_range(0, 10000, 1), AllocTask(), + tbb::simple_partitioner()); + for (int i=0; i<2; i++) + NativeParallelFor(2, RunInNativeThread(/*create_tsi=*/1==i)); +} + int TestMain() { using namespace Harness; TestBlockNonblock(); + TestTasksInThread(); bool child = false; #if _WIN32||_WIN64 diff --git a/src/test/test_tbb_version.cpp b/src/test/test_tbb_version.cpp index a2f223917f..0d08b1c71d 100644 --- a/src/test/test_tbb_version.cpp +++ b/src/test/test_tbb_version.cpp @@ -238,7 +238,7 @@ int main(int argc, char *argv[] ) { void initialize_strings_vector(std::vector * vector) { vector->push_back(string_pair("TBB: VERSION\t\t4.4", required)); // check TBB_VERSION - vector->push_back(string_pair("TBB: INTERFACE VERSION\t9000", required)); // check TBB_INTERFACE_VERSION + vector->push_back(string_pair("TBB: INTERFACE VERSION\t9001", required)); // check TBB_INTERFACE_VERSION vector->push_back(string_pair("TBB: BUILD_DATE", required)); vector->push_back(string_pair("TBB: BUILD_HOST", required)); vector->push_back(string_pair("TBB: BUILD_OS", required));
void tbb::parallel_for_each (InputIterator Iterator  first,
InputIterator Iterator  last,