[tbb] 40/64: Imported Upstream version 4.2~20130725

Graham Inggs ginggs at moszumanska.debian.org
Mon Jul 3 12:28:01 UTC 2017


This is an automated email from the git hooks/post-receive script.

ginggs pushed a commit to branch master
in repository tbb.

commit 7a5ba16f6d2a61625e835c3490782b9e36a11fd7
Author: Graham Inggs <ginggs at debian.org>
Date:   Mon Jul 3 14:13:57 2017 +0200

    Imported Upstream version 4.2~20130725
---
 CHANGES                                            |  54 ++-
 Makefile                                           |  24 +-
 build/Makefile.rml                                 |   4 +
 build/Makefile.tbb                                 |   6 +-
 build/Makefile.tbbmalloc                           |  10 +-
 build/Makefile.test                                |  23 +-
 build/android.linux.launcher.sh                    |  87 ++--
 build/common.inc                                   |  19 +-
 build/common_rules.inc                             |  21 +
 build/linux.inc                                    |   3 +-
 build/macos.clang.inc                              |   4 +-
 build/macos.icc.inc                                |   7 +
 build/macos.inc                                    |   6 +-
 build/mic.linux.inc                                |   2 +-
 build/mic.linux.launcher.sh                        |  41 +-
 build/mic.offload.inc                              |   4 +-
 build/test_launcher.bat                            |  35 +-
 build/test_launcher.sh                             |  35 +-
 build/vsproject/tbb.vcproj                         | 104 ++++-
 build/windows.cl.inc                               |   4 +-
 build/windows.icl.inc                              |   4 +-
 doc/Release_Notes.txt                              |  15 +-
 doc/html/a00019.html                               |   2 +
 doc/html/a00127.html                               |   2 +-
 doc/html/a00281.html                               |   8 +
 doc/html/a00343.html                               |   3 +-
 doc/html/a00405.html                               |  31 +-
 doc/html/a00425.html                               |  12 +-
 doc/html/a00428.html                               |   8 +
 doc/html/a00443.html                               |  31 ++
 doc/html/a00444.html                               |  23 +
 doc/html/functions_0x63.html                       |   2 +-
 doc/html/functions_0x69.html                       |   3 +-
 doc/html/functions_0x6f.html                       |   2 +-
 doc/html/functions_func_0x63.html                  |   2 +-
 doc/html/functions_func_0x69.html                  |   3 +-
 doc/html/functions_func_0x6f.html                  |   2 +-
 doc/html/globals.html                              |   3 +-
 doc/html/globals_func.html                         |   3 +-
 doc/html/namespacemembers.html                     |   4 +-
 doc/html/namespacemembers_enum.html                |   1 +
 doc/html/namespacemembers_eval.html                |   1 +
 doc/html/namespacemembers_func.html                |   1 +
 ...embers_enum.html => namespacemembers_type.html} |   8 +-
 examples/GettingStarted/sub_string_finder/Makefile |  15 +-
 .../msvs/sub_string_finder.vcproj                  |   8 +-
 .../msvs/sub_string_finder_extended.vcproj         |   8 +-
 .../msvs/sub_string_finder_pretty.vcproj           |   8 +-
 examples/Makefile                                  |   5 +-
 examples/common/gui/Makefile.win                   |   2 +-
 .../count_strings/msvs/count_strings.vcproj        |   8 +-
 .../shortpath/msvs/shortpath.vcproj                |   8 +-
 examples/graph/binpack/msvs/binpack.vcproj         |   8 +-
 .../msvs/dining_philosophers.vcproj                |   8 +-
 examples/graph/logic_sim/msvs/test_all.vcproj      |   8 +-
 examples/parallel_do/parallel_preorder/Makefile    |   4 +
 .../parallel_do/parallel_preorder/Makefile.windows |   3 +
 .../msvs/parallel_preorder.vcproj                  |   8 +-
 .../game_of_life/msvs/Game_of_life.vcproj          |  16 +-
 .../parallel_for/polygon_overlay/msvs/pover.vcproj |  16 +-
 examples/parallel_for/seismic/Makefile             |  30 +-
 examples/parallel_for/seismic/main.cpp             |  38 +-
 .../seismic/msvs/SeismicSimulation.vcproj          |  16 +-
 examples/parallel_for/seismic/universe.cpp         |   4 -
 examples/parallel_for/tachyon/Makefile.windows     |  51 +-
 .../parallel_for/tachyon/msvs/tachyon.tbb.vcproj   |  16 +-
 .../parallel_for/tachyon/msvs/tachyon.tbb1d.vcproj |  16 +-
 .../convex_hull/msvs/convex_hull_benchmark.vcproj  |   8 +-
 .../convex_hull/msvs/convex_hull_sample.vcproj     |   8 +-
 examples/parallel_reduce/primes/Makefile           |  15 +-
 examples/parallel_reduce/primes/msvs/primes.vcproj |   8 +-
 examples/pipeline/square/msvs/square.vcproj        |   8 +-
 examples/task/tree_sum/msvs/tree_sum.vcproj        |   8 +-
 examples/task_group/sudoku/Makefile                |  15 +-
 examples/task_group/sudoku/msvs/sudoku.vcproj      |   8 +-
 examples/task_priority/fractal/msvs/fractal.vcproj |  16 +-
 examples/test_all/fibonacci/msvs/fibonacci.vcproj  |   8 +-
 include/tbb/concurrent_hash_map.h                  |  24 +
 include/tbb/concurrent_unordered_map.h             |  40 ++
 include/tbb/concurrent_unordered_set.h             |  38 ++
 include/tbb/concurrent_vector.h                    |   8 +-
 include/tbb/flow_graph.h                           |  31 +-
 include/tbb/internal/_concurrent_unordered_impl.h  |  15 +
 include/tbb/internal/_flow_graph_join_impl.h       |   8 +-
 include/tbb/internal/_flow_graph_node_impl.h       |   8 +-
 include/tbb/internal/_mutex_padding.h              |  76 +++
 include/tbb/internal/_x86_eliding_mutex_impl.h     | 157 +++++++
 include/tbb/machine/gcc_generic.h                  |   4 +
 include/tbb/machine/gcc_ia32_common.h              |   2 +
 include/tbb/machine/{mic_common.h => gcc_itsx.h}   |  60 ++-
 include/tbb/machine/icc_generic.h                  |   3 +-
 include/tbb/machine/linux_intel64.h                |   1 +
 include/tbb/machine/mic_common.h                   |   4 +-
 include/tbb/machine/msvc_ia32_common.h             |  13 +-
 include/tbb/memory_pool.h                          |   2 +
 include/tbb/parallel_reduce.h                      |   6 +-
 include/tbb/scalable_allocator.h                   |  47 +-
 include/tbb/spin_mutex.h                           |  49 +-
 include/tbb/task.h                                 |   9 +-
 include/tbb/tbb_config.h                           | 114 +++--
 include/tbb/tbb_machine.h                          |  30 ++
 include/tbb/tbb_stddef.h                           |   4 +-
 src/Makefile                                       |  71 +--
 src/old/concurrent_vector_v2.h                     |   2 +-
 src/tbb/arena.cpp                                  |   2 +-
 src/tbb/dynamic_link.cpp                           |  80 +++-
 src/tbb/dynamic_link.h                             |  21 +-
 src/tbb/governor.cpp                               |   2 +-
 .../ia32-masm/itsx.asm}                            |  48 +-
 .../intel64-masm/itsx.asm}                         |  44 +-
 src/tbb/market.cpp                                 |   4 +-
 src/tbb/market.h                                   |   2 +-
 src/tbb/scheduler.h                                |   2 +-
 src/tbb/task.cpp                                   |   5 +-
 src/tbb/task_group_context.cpp                     |  57 ++-
 src/tbb/win32-tbb-export.lst                       |   2 +
 src/tbb/win64-tbb-export.lst                       |   2 +
 src/tbbmalloc/Customize.h                          |  21 +-
 src/tbbmalloc/frontend.cpp                         | 519 ++++++++++++---------
 src/tbbmalloc/large_objects.cpp                    |  25 +-
 src/tbbmalloc/lin32-tbbmalloc-export.def           |   1 +
 src/tbbmalloc/lin64-tbbmalloc-export.def           |   1 +
 src/tbbmalloc/lin64ipf-tbbmalloc-export.def        |   1 +
 src/tbbmalloc/mac32-tbbmalloc-export.def           |   1 +
 src/tbbmalloc/mac64-tbbmalloc-export.def           |   1 +
 src/tbbmalloc/tbbmalloc_internal.h                 |  37 +-
 src/tbbmalloc/win32-gcc-tbbmalloc-export.def       |   1 +
 src/tbbmalloc/win32-tbbmalloc-export.def           |   1 +
 src/tbbmalloc/win64-gcc-tbbmalloc-export.def       |   1 +
 src/tbbmalloc/win64-tbbmalloc-export.def           |   1 +
 src/tbbproxy/tbbproxy.cpp                          |   2 +-
 src/test/harness.h                                 | 198 ++++----
 src/test/harness_defs.h                            |   4 +
 src/test/harness_tsx.h                             |  89 ++++
 src/test/test_ScalableAllocator.cpp                |   9 +-
 src/test/test_ScalableAllocator_STL.cpp            |   2 +-
 src/test/test_atomic.cpp                           |   7 +-
 src/test/test_buffer_node.cpp                      |   2 +-
 src/test/test_cilk_interop.cpp                     |   1 +
 src/test/test_concurrent_hash_map.cpp              |  19 +
 src/test/test_concurrent_priority_queue.cpp        |  20 +-
 src/test/test_concurrent_unordered.cpp             |  61 +++
 src/test/test_concurrent_vector.cpp                |  15 +-
 src/test/test_dynamic_link.cpp                     |  90 ++++
 src/test/test_eh_algorithms.cpp                    |  56 +++
 src/test/test_initializer_list.h                   | 160 +++++--
 src/test/test_malloc_compliance.cpp                |   2 +
 src/test/test_malloc_init_shutdown.cpp             |   2 +
 src/test/test_malloc_pools.cpp                     |  25 +
 src/test/test_malloc_pure_c.c                      |  20 +-
 src/test/test_malloc_whitebox.cpp                  |  78 ++--
 src/test/test_mutex.cpp                            |  65 ++-
 src/test/test_priority_queue_node.cpp              |   2 +-
 src/test/test_queue_node.cpp                       |   2 +-
 src/test/test_sequencer_node.cpp                   |   2 +-
 src/test/test_task_group.cpp                       |   7 +-
 src/test/test_task_priority.cpp                    |  92 +++-
 src/test/test_tbb_version.cpp                      |   4 +-
 158 files changed, 2714 insertions(+), 1103 deletions(-)

diff --git a/CHANGES b/CHANGES
index 496693a..0479ff2 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,8 +1,58 @@
 ------------------------------------------------------------------------
 The list of most significant changes made over time in
-Intel(R) Threading Bulding Blocks (Intel(R) TBB).
-------------------------------------------------------------------------
+Intel(R) Threading Building Blocks (Intel(R) TBB).
+------------------------------------------------------------------------
+
+Intel TBB 4.2
+TBB_INTERFACE_VERSION == 7000
+
+Changes (w.r.t. Intel TBB 4.1 Update 4):
+
+- Added speculative_spin_mutex, which uses Intel(R) Transactional
+    Synchronization Extensions when they are supported by hardware.
+- Binary files linked with libc++ (the C++ standard library in Clang)
+    were added on OS X*.
+- For OS X* exact exception propagation is supported with Clang;
+    it requires use of libc++ and corresponding Intel TBB binaries.
+- Support for C++11 initilizer lists in constructor and assigment
+    has been added to concurrent_hash_map, concurrent_unordered_set,
+    concurrent_unordered_multiset, concurrent_unordered_map,
+    concurrent_unordered_multimap.
+- The memory allocator may now clean its per-thread memory caches
+    when it cannot get more memory.
+- Added the scalable_allocation_command() function for on-demand
+    cleaning of internal memory caches.
+- Reduced the time overhead for freeing memory objects smaller than ~8K.
+- Simplified linking with the debug library for applications that use
+    Intel TBB in code offloaded to Intel(R) Xeon Phi(tm) coprocessors.
+    See an example in
+    examples/GettingStarted/sub_string_finder/Makefile.
+- Various improvements in source code, scripts and makefiles.
+
+Changes affecting backward compatibility:
+
+- tbb::flow::graph has been modified to spawn its tasks;
+    the old behaviour (task enqueuing) is deprecated. This change may
+    impact applications that expected a flow graph to make progress
+    without calling wait_for_all(), which is no longer guaranteed. See
+    the documentation for more details.
+- Changed the return values of the scalable_allocation_mode() function.
+
+Bugs fixed:
 
+- Fixed a leak of parallel_reduce body objects when execution is
+    cancelled or an exception is thrown, as suggested by Darcy Harrison.
+- Fixed a race in the task scheduler which can lower the effective
+    priority despite the existence of higher priority tasks.
+- On Linux an error during destruction of the internal thread local
+    storage no longer results in an exception.
+
+Open-source contributions integrated:
+
+- Fixed task_group_context state propagation to unrelated context trees
+    by Raf Schietekat.
+
+------------------------------------------------------------------------
 Intel TBB 4.1 Update 4
 TBB_INTERFACE_VERSION == 6105
 
diff --git a/Makefile b/Makefile
index 38dc03f..d2e34fb 100644
--- a/Makefile
+++ b/Makefile
@@ -37,26 +37,26 @@ default: tbb tbbmalloc $(if $(use_proxy),tbbproxy)
 all: tbb tbbmalloc tbbproxy test examples
 
 tbb: mkdir
-	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbb cfg=debug tbb_root=$(tbb_root)
-	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbb cfg=release tbb_root=$(tbb_root)
+	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbb cfg=debug
+	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbb cfg=release
 
 tbbmalloc: mkdir
-	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc tbb_root=$(tbb_root)
-	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc tbb_root=$(tbb_root)
+	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc
+	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc
 
 tbbproxy: mkdir
-	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbbproxy cfg=debug tbbproxy tbb_root=$(tbb_root)
-	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbbproxy cfg=release tbbproxy tbb_root=$(tbb_root)
+	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbbproxy cfg=debug tbbproxy
+	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbbproxy cfg=release tbbproxy
 
 test: tbb tbbmalloc $(if $(use_proxy),tbbproxy)
-	-$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc_test tbb_root=$(tbb_root)
-	-$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.test cfg=debug tbb_root=$(tbb_root)
-	-$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc_test tbb_root=$(tbb_root)
-	-$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.test cfg=release tbb_root=$(tbb_root)
+	-$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc_test
+	-$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.test cfg=debug
+	-$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc_test
+	-$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.test cfg=release
 
 rml: mkdir
-	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.rml cfg=debug tbb_root=$(tbb_root)
-	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.rml cfg=release tbb_root=$(tbb_root)
+	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.rml cfg=debug
+	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.rml cfg=release
 
 
 examples: tbb tbbmalloc
diff --git a/build/Makefile.rml b/build/Makefile.rml
index 1f7ddba..12268af 100644
--- a/build/Makefile.rml
+++ b/build/Makefile.rml
@@ -30,6 +30,10 @@ TEST_RESOURCE = $(RML.RES)
 include $(tbb_root)/build/common.inc
 DEBUG_SUFFIX=$(findstring _debug,_$(cfg))
 
+ifeq (android,$(target))
+$(error "RML is not supported on Android")
+endif
+
 # default target
 default_rml: rml rml_test
 
diff --git a/build/Makefile.tbb b/build/Makefile.tbb
index e384504..452882e 100644
--- a/build/Makefile.tbb
+++ b/build/Makefile.tbb
@@ -70,9 +70,6 @@ TBB_CPLUS.OBJ = concurrent_hash_map.$(OBJ) \
 		spin_rw_mutex.$(OBJ) \
 		spin_mutex.$(OBJ) \
 		critical_section.$(OBJ) \
-		task.$(OBJ) \
-		tbb_misc.$(OBJ) \
-		tbb_misc_ex.$(OBJ) \
 		mutex.$(OBJ) \
 		recursive_mutex.$(OBJ) \
 		condition_variable.$(OBJ) \
@@ -81,6 +78,9 @@ TBB_CPLUS.OBJ = concurrent_hash_map.$(OBJ) \
 		semaphore.$(OBJ) \
 		private_server.$(OBJ) \
 		rml_tbb.$(OBJ) \
+		tbb_misc.$(OBJ) \
+		tbb_misc_ex.$(OBJ) \
+		task.$(OBJ) \
 		task_group_context.$(OBJ) \
 		governor.$(OBJ) \
 		market.$(OBJ) \
diff --git a/build/Makefile.tbbmalloc b/build/Makefile.tbbmalloc
index 49d6a97..3d317a9 100644
--- a/build/Makefile.tbbmalloc
+++ b/build/Makefile.tbbmalloc
@@ -175,7 +175,7 @@ $(MALLOC_MAIN_TESTS): %.$(TEST_EXT): %.$(OBJ) $(MALLOC_LIB)
 MALLOC_C_TESTS = test_malloc_pure_c.$(TEST_EXT)
 
 $(MALLOC_C_TESTS): %.$(TEST_EXT): %.$(OBJ) $(MALLOC_LIB)
-	$(CPLUS) $(OUTPUT_KEY)$@ $(CPLUS_FLAGS) $^ $(LINK_MALLOC_LIB) $(LIBS) $(LINK_FLAGS)
+	$(CPLUS) $(OUTPUT_KEY)$@ $(CPLUS_FLAGS) $< $(LINK_MALLOC_LIB) $(LIBS) $(LINK_FLAGS)
 
 # Rules for generating a test DLL
 %_dll.$(DLL): %_dll.$(OBJ)
@@ -197,13 +197,13 @@ malloc_test_no_depends: $(TEST_PREREQUISITE) $(MALLOC_TESTS)
 	$(run_cmd) ./test_malloc_pools.$(TEST_EXT) $(args) 1:4
 ifneq (,$(MALLOCPROXY.DLL))
 	$(run_cmd) ./test_malloc_atexit.$(TEST_EXT) $(args)
-	$(run_cmd) $(TEST_LAUNCHER) test_malloc_lib_unload.$(TEST_EXT) $(args)
-	$(run_cmd) $(TEST_LAUNCHER) -l $(call cross_suffix,$(MALLOCPROXY.DLL)) test_malloc_overload.$(TEST_EXT) $(args)
-	$(run_cmd) $(TEST_LAUNCHER) test_malloc_overload_proxy.$(TEST_EXT) $(args)
+	$(run_cmd) $(TEST_LAUNCHER) ./test_malloc_lib_unload.$(TEST_EXT) $(args)
+	$(run_cmd) $(TEST_LAUNCHER) -l $(call cross_suffix,$(MALLOCPROXY.DLL)) ./test_malloc_overload.$(TEST_EXT) $(args)
+	$(run_cmd) $(TEST_LAUNCHER) ./test_malloc_overload_proxy.$(TEST_EXT) $(args)
 endif
 	$(run_cmd) ./test_malloc_used_by_lib.$(TEST_EXT)
 	$(run_cmd) ./test_malloc_whitebox.$(TEST_EXT) $(args) 1:4
-	$(run_cmd) $(TEST_LAUNCHER) -u test_malloc_compliance.$(TEST_EXT) $(args) 1:4
+	$(run_cmd) $(TEST_LAUNCHER) -u ./test_malloc_compliance.$(TEST_EXT) $(args) 1:4
 	$(run_cmd) ./test_ScalableAllocator.$(TEST_EXT) $(args)
 	$(run_cmd) ./test_ScalableAllocator_STL.$(TEST_EXT) $(args)
 	$(run_cmd) ./test_malloc_regression.$(TEST_EXT) $(args)
diff --git a/build/Makefile.test b/build/Makefile.test
index e9b264c..3c28e18 100644
--- a/build/Makefile.test
+++ b/build/Makefile.test
@@ -124,11 +124,15 @@ test_atomic_pic.$(OBJ): CPLUS_FLAGS+=$(DEFINE_KEY)__TBB_TEST_PIC=1
 test_atomic_pic.$(OBJ): test_atomic.cpp
 	$(call make-cxx-obj,$@,$<,)
 
-#Test of generic gcc port and icc intrinsics port
+# Test of generic gcc port and icc intrinsics port
 %_compiler_builtins.$(OBJ): CPLUS_FLAGS+=$(DEFINE_KEY)__TBB_TEST_BUILTINS=1
 %_compiler_builtins.$(OBJ): %.cpp
 	$(call make-cxx-obj,$@,$<,)
 
+# The test_dynamic_link test doesn't depend on the TBB library
+test_dynamic_link.$(TEST_EXT): LINK_TBB.LIB =
+test_dynamic_link.$(TEST_EXT): TEST_USES_TBB=0
+test_dynamic_link.$(TEST_EXT): LIBS += $(LIBDL)
 
 # The main list of TBB tests
 TEST_TBB_PLAIN.EXE = test_assembly.$(TEST_EXT)   \
@@ -214,6 +218,7 @@ TEST_TBB_PLAIN.EXE = test_assembly.$(TEST_EXT)   \
 	test_aggregator.$(TEST_EXT)                  \
 	test_concurrent_lru_cache.$(TEST_EXT)        \
 	test_examples_common_utility.$(TEST_EXT)     \
+	test_dynamic_link.$(TEST_EXT)                \
 	test_tbb_version.$(TEST_EXT)                 # insert new files right above
 
 TEST_TBB_PLAIN.EXE += $(TEST_TBB_CPP11)
@@ -258,6 +263,7 @@ test_tbb_plain: $(TEST_PREREQUISITE) $(SCHEDULER_DIRECTLY_INCLUDED) $(TEST_TBB_P
         # Yes, 4:8 is intended on the next line.
 	$(run_cmd) ./test_yield.$(TEST_EXT) $(args) 4:8
 	$(run_cmd) ./test_handle_perror.$(TEST_EXT) $(args)
+	$(run_cmd) ./test_dynamic_link.$(TEST_EXT) $(args)
 	$(run_cmd) ./test_task_auto_init.$(TEST_EXT) $(args)
 	$(run_cmd) ./test_task_arena.$(TEST_EXT) $(args)
 	$(run_cmd) ./test_task_scheduler_init.$(TEST_EXT) $(args) 1:4
@@ -382,21 +388,6 @@ codecov_gen:
 	codecov $(if $(findstring -,$(codecov)),$(codecov),) -demang -comp $(tbb_root)/build/codecov.txt
 endif
 
-test_% debug_%: test_%.$(TEST_EXT) $(TEST_PREREQUISITE)
-ifeq (,$(repeat))
-	$(run_cmd) ./$< $(args)
-else
-ifeq (windows,$(tbb_os))
-	for /L %%i in (1,1,$(repeat)) do echo %%i of $(repeat): && $(run_cmd) $< $(args)
-else
-	for ((i=1;i<=$(repeat);++i)); do echo $$i of $(repeat): && $(run_cmd) ./$< $(args); done
-endif
-endif # repeat
-ifneq (,$(codecov))
-	profmerge
-	codecov $(if $(findstring -,$(codecov)),$(codecov),) -demang -comp $(tbb_root)/build/codecov.txt
-endif
-
 time_%: time_%.$(TEST_EXT) $(TEST_PREREQUISITE)
 	$(run_cmd) ./$< $(args)
 
diff --git a/build/android.linux.launcher.sh b/build/android.linux.launcher.sh
index cb23f7b..758c472 100644
--- a/build/android.linux.launcher.sh
+++ b/build/android.linux.launcher.sh
@@ -27,10 +27,13 @@
 # the GNU General Public License.
 
 # Usage:
-# android.linux.launcher.sh [-v] [-u] [-l <library>] <executable> <arg1> <arg2> <argN>
-#         where: -l <library> specfies the library name to be assigned to LD_PRELOAD
-#         where: -v enables verbose output when running the test (where supported)
+# android.linux.launcher.sh [-v] [-q] [-s] [-r <repeats>] [-u] [-l <library>] <executable> <arg1> <arg2> <argN>
+#         where: -v enables verbose output
+#         where: -q enables quiet mode
+#         where: -s runs the test in stress mode (until non-zero exit code or ctrl-c pressed)
+#         where: -r <repeats> specifies number of times to repeat execution
 #         where: -u is ignored on Android
+#         where: -l <library> specifies the library name to be assigned to LD_PRELOAD
 #
 # Libs and executable necessary for testing should be present in the current directory before running.
 # ANDROID_SERIAL must be set to the connected Android target device name for file transfer and test runs.
@@ -51,21 +54,28 @@ do_trap_cleanup()
     exit -1
 }
 
-# Process the optional arguments if present
-if [ "x$1" = "x-v" ]; then {
-    verb="$1"
-    shift 1
-}; fi
-
-if [ "x$1" = "x-u" ]; then {
-    shift 1
-}; fi
-
-if [ "x$1" = "x-l" ]; then {
-    ldpreload="$2"
-    shift 2
-}; fi
-
+while getopts  "qvsr:ul:" flag #
+do case $flag in #
+    s )  # Stress testing mode
+         echo Doing stress testing. Press Ctrl-C to terminate
+         run_env='stressed() { while $*; do :; done; }; ' #
+         run_prefix="$run_prefix stressed" ;; #
+    r )  # Repeats test n times
+         run_env="repeated() { for i in $(seq -s ' ' 1 $OPTARG) ; do echo \$i of $OPTARG:; \$*; done; }; " #
+         run_prefix="$run_prefix repeated" ;; #
+    l )  # Additional library
+         ldpreload="$OPTARG " ;; #
+    u )  # Stack limit
+         ;; # 
+    q )  # Quiet mode, removes 'done' but prepends any other output by test name
+         OUTPUT='2>&1 | sed -e "s/done//;/^[[:space:]]*$/d;s!^!$exename: !"' ;; #
+    v )  # Verbose mode
+         SUPPRESS='' #
+         verbose=1 ;; #
+esac done #
+shift `expr $OPTIND - 1` #
+[ -z "$OUTPUT" ] && OUTPUT='| sed -e "s/\\r$//"'
+[ $verbose ] || SUPPRESS='>/dev/null'
 # Collect the executable name
 exename=$(basename $1)
 shift
@@ -86,27 +96,22 @@ else
 	fnamelist="$fnamelist libgnustl_shared.so"
 fi
 
+# Make ldpreload list
+mallocfiles="$(/bin/ls libtbbmalloc* 2>/dev/null)"
+#TODO: any better workaround instead calling echo 
+#(without echo there is error: /system/bin/sh: libtbbmalloc_proxy.so: not found)
+[ -z "$mallocfiles" ] || ldpreload="$ldpreload `echo $mallocfiles`"
+[ -z "$ldpreload" ] || run_prefix="LD_PRELOAD='$ldpreload' $run_prefix"
+
 # Find the TBB libraries and add them to the list.
 # Add TBB libraries from the current directory that contains libtbb* files
-
 files="$(/bin/ls libtbb* 2> /dev/null)"
-if [ ! -z "$files" ]; then fnamelist="$fnamelist $files"; fi
-
-mallocfiles="$(/bin/ls libtbbmalloc* 2> /dev/null)"
-if [ ! -z "$mallocfiles" ]; then {
-    #TODO: any better workaround instead calling echo 
-    #(without echo there is error: /system/bin/sh: libtbbmalloc_proxy.so: not found)
-    ldpreload="$ldpreload $(echo $mallocfiles)"
-}; fi
-
-if [ ! -z "$ldpreload" ]; then ldpreload="export LD_PRELOAD=$ldpreload;"; fi
+[ -z "$files" ] || fnamelist="$fnamelist $files"
 
 # Add any libraries built for specific tests.
 exeroot=${exename%\.*}
 files="$(/bin/ls ${exeroot}*.so ${exeroot}*.so.* 2> /dev/null)"
-if [ ! -z "$files" ]; then {
-    fnamelist="$fnamelist $files" 
-}; fi
+[ -z "$files" ] || fnamelist="$fnamelist $files"
 
 # TODO: Add extra libraries from the Intel(R) Compiler for certain tests
 # found=$(echo $exename | egrep 'test_malloc_atexit\|test_malloc_lib_unload' 2> /dev/null)
@@ -121,7 +126,8 @@ transfers_ok=1
 for fullname in $fnamelist; do {
     if [ -r $fullname ]; then {
         # Transfer the executable and libraries to top-level target directory
-        adb push $fullname ${targetdir}/$(basename $fullname) > /dev/null 2>&1
+        [ $verbose ] && echo -n "Pushing $fullname: "
+        eval "adb push $fullname ${targetdir}/$(basename $fullname) $SUPPRESS 2>&1"
     }; else {
         echo "Error: required file ${currentdir}/${fullname} for test $exename not available for transfer."
         transfers_ok=0
@@ -145,20 +151,25 @@ for fullname in "$@"; do {
         }; fi
         # Create the target directory to hold input file if necessary
         if [ ! -z $directory ]; then {
-            adb shell "mkdir $directory" > /dev/null 2>&1
+            eval "adb shell 'mkdir $directory' $SUPPRESS 2>&1"
         }; fi
         # Transfer the input file to corresponding directory on target device
-        adb push $fullname ${targetdir}/$fullname > /dev/null 2>&1
+        [ $verbose ] && echo -n "Pushing $fullname: "
+        eval "adb push $fullname ${targetdir}/$fullname $SUPPRESS 2>&1"
     }; fi
 }; done
 
+[ $verbose ] && echo Running $run_prefix ./$exename $*
+run_env="$run_env cd $targetdir; export LD_LIBRARY_PATH=."
+
 # The return_code file is the best way found to return the status of the test execution when using adb shell.
-(adb shell "cd $targetdir; $ldpreload export LD_LIBRARY_PATH=.; ./$exename $verb $*; echo \$? > return_code") | sed -e "s/\\r$//"
+eval 'adb shell "$run_env; $run_prefix ./$exename $* || echo -n \$? >error_code"' "${OUTPUT}" #
 
 # Capture the return code string and remove the trailing \r from the return_code file contents
-exitcode=`(adb shell "cat $targetdir/return_code 2> /dev/null") | sed -e "s/\\r$//"`
+err=`adb shell "cat $targetdir/error_code 2>/dev/null"`
+[ -z $err ] || echo $exename: exited with error $err
 
 do_cleanup
 
 # Return the exit code of the test.
-exit $exitcode
+exit $err
diff --git a/build/common.inc b/build/common.inc
index 0a2d812..8a0057b 100644
--- a/build/common.inc
+++ b/build/common.inc
@@ -109,7 +109,7 @@ ifneq ($(BUILDING_PHASE),1)
  # definitions for top-level Makefiles
  origin_build_dir:=$(origin tbb_build_dir)
  tbb_build_dir?=$(tbb_root)$(SLASH)build
- tbb_build_prefix?=$(tbb_os)_$(arch)_$(compiler)_$(runtime)$(CPF_SUFFIX)
+ export tbb_build_prefix?=$(tbb_os)_$(arch)_$(compiler)_$(runtime)$(CPF_SUFFIX)
  work_dir=$(tbb_build_dir)$(SLASH)$(tbb_build_prefix)
 endif  # BUILDING_PHASE != 1
 
@@ -129,13 +129,26 @@ ifndef BUILDING_PHASE
   # assign new value for tbb_root if path is not absolute (the filter keeps only /* paths)
   ifeq ($(filter /% $(SLASH)%, $(subst :, ,$(tbb_root)) ),)
    ifeq ($(origin_build_dir),undefined)
+   #relative path are needed here as a workaround to support whitespaces in path
     override tbb_root:=../..
    else
-    override tbb_root:=$(CWD)/$(tbb_root)
-   endif
+    override tbb_root:=$(CURDIR)/$(tbb_root)
   endif
   export tbb_root
+  endif
  endif # !BUILDING_PHASE
 
 .DELETE_ON_ERROR:    # Make will delete target if error occurred when building it.
 
+# MAKEOVERRIDES contains the command line variable definitions. Reseting it to
+# empty allows propogating all exported overridden variables to nested makes.
+# NOTEs:
+#   1. All variable set in command line are propagated to nested makes.
+#   2. All variables declared with the "export" keyword are propagated to
+#   nested makes.
+#   3. "override" allows changing variables set in command line. But it doesn't
+#   propagate new values to nested makes. For propagation, the "export" keyword
+#   should be used.
+#   4. gmake v3.80 doesn't support exporting of target-specific variables using
+#   the "export" keyword
+MAKEOVERRIDES =
diff --git a/build/common_rules.inc b/build/common_rules.inc
index f00af86..3bee74b 100644
--- a/build/common_rules.inc
+++ b/build/common_rules.inc
@@ -32,6 +32,20 @@ ifeq ($(tbb_strict),1)
   WARNING_KEY += $(WARNING_AS_ERROR_KEY)
 endif
 
+ifneq (,$(findstring s,$(MAKEFLAGS)))
+  override largs+=-q
+endif
+ifneq (,$(repeat))
+  override largs+=-r $(repeat)
+endif
+ifneq (,$(largs))
+  override run_cmd:=$(run_cmd)$(TEST_LAUNCHER)
+  TEST_LAUNCHER=
+  ifeq (,$(run_cmd))
+    $(warning Test launcher is not defined for the platform, ignoring launcher arguments)
+  endif
+endif
+
 ifndef TEST_EXT
     TEST_EXT = exe
 endif
@@ -134,3 +148,10 @@ version_string.ver:
 	$(MAKE_VERSIONS)
 endif
 
+test_% debug_%: test_%.$(TEST_EXT) $(TEST_PREREQUISITE)
+	$(run_cmd) ./$< $(args)
+ifneq (,$(codecov))
+	profmerge
+	codecov $(if $(findstring -,$(codecov)),$(codecov),) -demang -comp $(tbb_root)/build/codecov.txt
+endif
+
diff --git a/build/linux.inc b/build/linux.inc
index 1c96d86..7f95cf0 100644
--- a/build/linux.inc
+++ b/build/linux.inc
@@ -98,6 +98,7 @@ endif
 
 OBJ = o
 DLL = so
+MALLOC_DLL?=$(DLL)
 LIBEXT = so
 SONAME_SUFFIX =$(shell grep TBB_COMPATIBLE_INTERFACE_VERSION $(tbb_root)/include/tbb/tbb_stddef.h | egrep -o [0-9.]+)
 
@@ -125,7 +126,7 @@ TBB.LIB = $(TBB.DLL)
 TBB_NO_VERSION.DLL=libtbb$(CPF_SUFFIX)$(DEBUG_SUFFIX).$(DLL)
 LINK_TBB.LIB = $(TBB_NO_VERSION.DLL)
 
-MALLOC_NO_VERSION.DLL = libtbbmalloc$(DEBUG_SUFFIX).$(DLL)
+MALLOC_NO_VERSION.DLL = libtbbmalloc$(DEBUG_SUFFIX).$(MALLOC_DLL)
 MALLOC.DEF = $(MALLOC_ROOT)/$(def_prefix)-tbbmalloc-export.def
 MALLOC.DLL = $(MALLOC_NO_VERSION.DLL).$(SONAME_SUFFIX)
 MALLOC.LIB = $(MALLOC_NO_VERSION.DLL)
diff --git a/build/macos.clang.inc b/build/macos.clang.inc
index e2347e2..cdb6f62 100644
--- a/build/macos.clang.inc
+++ b/build/macos.clang.inc
@@ -55,8 +55,8 @@ endif
 CPLUS_FLAGS += -DUSE_PTHREAD
 
 ifeq (libc++,$(stdlib))
-    LIBS += -stdlib=libc++
-    CPLUS_FLAGS += -stdlib=libc++
+    CPLUS_FLAGS    += -stdlib=libc++
+    LIB_LINK_FLAGS += -stdlib=libc++
 endif
 
 CPP11_FLAGS = -std=c++11 -D_TBB_CPP0X
diff --git a/build/macos.icc.inc b/build/macos.icc.inc
index 8ce49df..bed86c5 100644
--- a/build/macos.icc.inc
+++ b/build/macos.icc.inc
@@ -70,6 +70,13 @@ ifneq (00,$(lambdas)$(cpp0x))
 	CPLUS_FLAGS += $(CPP11_FLAGS)
 endif
 
+# ICC 14.0 and higher support clang environment
+ifneq (,$(shell icc -dumpversion | egrep  "^1[4-9]\."))
+    ifeq (libc++,$(stdlib))
+        CPLUS_FLAGS    += -use-clang-env -stdlib=libc++
+        LIB_LINK_FLAGS += -use-clang-env -stdlib=libc++
+    endif
+endif
 #------------------------------------------------------------------------------
 # Setting assembler data.
 #------------------------------------------------------------------------------
diff --git a/build/macos.inc b/build/macos.inc
index 3a772e5..6123013 100644
--- a/build/macos.inc
+++ b/build/macos.inc
@@ -59,12 +59,13 @@ ifeq (ios,$(target))
   ifneq (armv7, $(arch))
     $(error $(arch) not supported for target 'ios')
   endif
-  SDKROOT?=$(shell xcodebuild -sdk -version | grep -o -E '/.*SDKs/iPhoneOS.*' 2>/dev/null)
+  export SDKROOT?=$(shell xcodebuild -sdk -version | grep -o -E '/.*SDKs/iPhoneOS.*' 2>/dev/null)
   ifeq (,$(SDKROOT))
     $(error iOS SDK not found)
   endif
   # next, use a single compiler include file for both iOS* and OS X* builds.
   override target:=macos
+  export target
 endif
 
 ifndef runtime
@@ -97,6 +98,7 @@ endif
 
 OBJ=o
 DLL=dylib
+MALLOC_DLL?=$(DLL)
 LIBEXT=dylib
 
 def_prefix = $(if $(findstring 64,$(arch)),mac64,mac32)
@@ -108,7 +110,7 @@ TBB.LIB = $(TBB.DLL)
 LINK_TBB.LIB = $(TBB.LIB)
 
 MALLOC.DEF = $(MALLOC_ROOT)/$(def_prefix)-tbbmalloc-export.def
-MALLOC.DLL = libtbbmalloc$(DEBUG_SUFFIX).$(DLL)
+MALLOC.DLL = libtbbmalloc$(DEBUG_SUFFIX).$(MALLOC_DLL)
 MALLOC.LIB = $(MALLOC.DLL)
 LINK_MALLOC.LIB = $(MALLOC.LIB)
 
diff --git a/build/mic.linux.inc b/build/mic.linux.inc
index 245c6db..6cf5dbb 100644
--- a/build/mic.linux.inc
+++ b/build/mic.linux.inc
@@ -30,7 +30,7 @@ endif
 
 ifneq ($(BUILDING_PHASE),1)
   # The same build prefix should be used in offload.inc
-  tbb_build_prefix?=mic_icc$(CPF_SUFFIX)
+  export tbb_build_prefix?=mic_icc$(CPF_SUFFIX)
 endif
 
 MAKE_VERSIONS=sh $(tbb_root)/build/version_info_linux.sh $(CPLUS) $(CPLUS_FLAGS) $(INCLUDES) >version_string.ver
diff --git a/build/mic.linux.launcher.sh b/build/mic.linux.launcher.sh
index 4709b98..319d598 100644
--- a/build/mic.linux.launcher.sh
+++ b/build/mic.linux.launcher.sh
@@ -27,11 +27,12 @@
 # the GNU General Public License.
 
 # Usage:
-# mic.linux.launcher.sh [-v] [-s] [-r <repeats>] [-u] [-l <library>] <executable> <arg1> <arg2> <argN>
+# mic.linux.launcher.sh [-v] [-q] [-s] [-r <repeats>] [-u] [-l <library>] <executable> <arg1> <arg2> <argN>
 #         where: -v enables verbose output
-#         where: -s enables stress testing unless ctrl-c is pressed
+#         where: -q enables quiet mode
+#         where: -s runs the test in stress mode (until non-zero exit code or ctrl-c pressed)
 #         where: -r <repeats> specifies number of times to repeat execution
-#         where: -u is ignored
+#         where: -u limits stack size
 #         where: -l <library> specifies the library name to be assigned to LD_PRELOAD
 #
 # Libs and executable necessary for testing should be present in the current directory before running.
@@ -40,18 +41,27 @@
 trap 'echo Error at line $LINENO while executing "$BASH_COMMAND"' ERR #
 trap 'echo -e "\n*** Interrupted ***" && exit 1' SIGINT SIGQUIT #
 # Process the optional arguments if present
-if [ "x$1" = "x-v" ]; then shift 1; else SUPPRESS='>/dev/null'; fi #
-if [ "x$1" = "x-s" ]; then shift 1; echo Doing stress testing. Press Ctrl-C to terminate
-    run_prefix+='rep() { while :; do $*; done; }; rep '; fi #
-if [ "x$1" = "x-r" ]; then #
-    run_prefix+="rep() { for i in \$(seq 1 $2); do echo \$i of $2:; \$*; done; }; rep " #
-    shift 2; fi #
-[ "x$1" = "x-u" ] && shift 1 #
-if [ "x$1" = "x-l" ]; then { #
-    ldd_list+="$2 "#
-    run_prefix+=" LD_PRELOAD=$2" #
-    shift 2 #
-}; fi #
+while getopts  "qvsr:ul:" flag #
+do case $flag in #
+    s )  # Stress testing mode
+         echo Doing stress testing. Press Ctrl-C to terminate
+         run_prefix+='rep() { while $*; do :; done; }; rep ' ;; #
+    r )  # Repeats test n times
+         run_prefix+="rep() { for i in \$(seq 1 $OPTARG); do echo \$i of $OPTARG:; \$*; done; }; rep " ;; #
+    l )  # Additional library
+         ldd_list+="$OPTARG " #
+         run_prefix+=" LD_PRELOAD=$OPTARG" ;; #
+    u )  # Set stack limit
+         run_prefix="ulimit -s 10240; $run_prefix" ;; # 
+    q )  # Quiet mode, removes 'done' but prepends any other output by test name
+         SUPPRESS='>/dev/null' #
+         verbose=1 ;; # TODO: implement a better quiet mode
+    v )  # Verbose mode
+         verbose=1 ;; #
+esac done #
+shift `expr $OPTIND - 1` #
+[ $verbose ] || SUPPRESS='>/dev/null' #
+
 # Collect the executable name
 fexename="$1" #
 exename=`basename $1` #
@@ -100,6 +110,7 @@ for fullname in "$@"; do if [ -r $fullname ]; then { #
 }; fi; done #
 #
 args=$* #
+[ $verbose ] && echo Running ./$exename $args #
 # Run the test on the target device
 kill_interrupt() { #
 echo -e "\n*** Killing remote $exename ***" && $RSH "killall $exename" #
diff --git a/build/mic.offload.inc b/build/mic.offload.inc
index 636887b..c934fc9 100644
--- a/build/mic.offload.inc
+++ b/build/mic.offload.inc
@@ -149,8 +149,8 @@ ifeq ($(TEST_COMPILATION),1)
   # Need to set anything because WARNING_AS_ERROR_KEY should not be empty.
   # Treat #2426 as a warning. Print errors only.
   tbb_strict=0
-  override WARNING_AS_ERROR_KEY = Warning as error
-  override WARNING_KEY = -diag-warning 2426 -w0
+  WARNING_AS_ERROR_KEY = Warning as error
+  WARNING_KEY = -diag-warning 2426 -w0
   # Enable mic-specific stuff.
   CXX_MIC_STUFF = -offload-attribute-target=mic -D__TBB_MIC_OFFLOAD=1 \
           -offload-option,mic,compiler,"-D__TBB_MIC_OFFLOAD=1 -DHARNESS_INCOMPLETE_SOURCES=1 -D__TBB_MIC_NATIVE -DTBB_USE_EXCEPTIONS=0"
diff --git a/build/test_launcher.bat b/build/test_launcher.bat
index 46d60a5..06903c4 100644
--- a/build/test_launcher.bat
+++ b/build/test_launcher.bat
@@ -28,8 +28,32 @@ REM the GNU General Public License.
 REM
 
 set cmd_line=
+if DEFINED run_prefix set cmd_line=%run_prefix%
 :while
 if NOT "%1"=="" (
+    REM Verbose mode
+    if "%1"=="-v" (
+        set verbose=yes
+        GOTO continue
+    )
+    REM Silent mode of 'make' requires additional support for associating
+    REM of test output with the test name. Verbose mode is the simplest way
+    if "%1"=="-q" (
+        set verbose=yes
+        GOTO continue
+    )
+    REM Run in stress mode
+    if "%1"=="-s" (
+        echo Doing stress testing. Press Ctrl-C to terminate
+        set stress=yes
+        GOTO continue
+    )
+    REM Repeat execution specified number of times
+    if "%1"=="-r" (
+        set repeat=%2
+        SHIFT
+        GOTO continue
+    )
     REM no LD_PRELOAD under Windows
     REM but run the test to check "#pragma comment" construction
     if "%1"=="-l" (
@@ -47,5 +71,12 @@ if NOT "%1"=="" (
     SHIFT
     GOTO while
 )
-
-%cmd_line%
+set cmd_line=%cmd_line:./=.\%
+if DEFINED verbose echo Running %cmd_line%
+if DEFINED stress set cmd_line=%cmd_line% ^& IF NOT ERRORLEVEL 1 GOTO stress
+:stress
+if DEFINED repeat (
+    for /L %%i in (1,1,%repeat%) do echo %%i of %repeat%: & %cmd_line%
+) else (
+    %cmd_line%
+)
diff --git a/build/test_launcher.sh b/build/test_launcher.sh
index 9788ddb..52552f0 100644
--- a/build/test_launcher.sh
+++ b/build/test_launcher.sh
@@ -26,8 +26,22 @@
 # invalidate any other reasons why the executable file might be covered by
 # the GNU General Public License.
 
-while getopts  "ul:" flag #
+# Usage:
+# test_launcher.sh [-v] [-q] [-s] [-r <repeats>] [-u] [-l <library>] <executable> <arg1> <arg2> <argN>
+#         where: -v enables verbose output
+#         where: -q enables quiet mode
+#         where: -s runs the test in stress mode (until non-zero exit code or ctrl-c pressed)
+#         where: -r <repeats> specifies number of times to repeat execution
+#         where: -u limits stack size
+#         where: -l <library> specifies the library name to be assigned to LD_PRELOAD
+
+while getopts  "qvsr:ul:" flag #
 do case $flag in #
+    s )  # Stress testing mode
+         run_prefix="$run_prefix stressed" ;; #
+    r )  # Repeats test n times
+         repeat=$OPTARG #
+         run_prefix="$run_prefix repeated" ;; #
     l )  if [ `uname` != 'Linux' ] ; then #
              echo 'skip' #
              exit #
@@ -35,6 +49,10 @@ do case $flag in #
          LD_PRELOAD=$OPTARG ;; #
     u )  # Set stack limit
          ulimit -s 10240 ;; # 
+    q )  # Quiet mode, removes 'done' but prepends any other output by test name
+         OUTPUT='2>&1 | sed -e "s/done//;/^[[:space:]]*$/d;s!^!$1: !"' ;; #
+    v )  # Verbose mode
+         verbose=1 ;; #
 esac done #
 shift `expr $OPTIND - 1` #
 if [ $OFFLOAD_EXECUTION ] ; then #
@@ -49,12 +67,21 @@ if [ $OFFLOAD_EXECUTION ] ; then #
     LD_LIBRARY_PATH=$TMPDIR_MIC:$LD_LIBRARY_PATH #
     export LD_LIBRARY_PATH #
 fi #
+stressed() { echo Doing stress testing. Press Ctrl-C to terminate
+    while :; do $*; done;#
+} #
+repeated() { #
+    for i in $(seq 1 $repeat); do echo $i of $repeat:; $*; done;#
+} #
 # Run the command line passed via parameters
+[ $verbose ] && echo Running $run_prefix $* #
 export LD_PRELOAD #
-./$* #
-exitcode=$? #
+exec 4>&1 # extracting exit code of the first command in pipeline needs duplicated stdout
+# custom redirection needs eval, otherwise shell cannot parse it
+err=`eval '( $run_prefix $* || echo \$? >&3; )' ${OUTPUT} 3>&1 >&4` #
+[ -z $err ] || echo $1: exited with error $err
 if [ $OFFLOAD_EXECUTION ] ; then #
     sudo ssh $MIC_CARD rm -fr "$TMPDIR_MIC" >/dev/null 2>/dev/null #
     rm -fr "$TMPDIR_HOST" >/dev/null 2>/dev/null #
 fi #
-exit ${exitcode} #
+exit $err #
diff --git a/build/vsproject/tbb.vcproj b/build/vsproject/tbb.vcproj
index ee3d37b..387432f 100644
--- a/build/vsproject/tbb.vcproj
+++ b/build/vsproject/tbb.vcproj
@@ -175,45 +175,47 @@
 		<Filter Name="Source Files" Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx" UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
 			<File RelativePath="..\..\src\tbb\ia32-masm\atomic_support.asm">
 				<FileConfiguration Name="Debug|Win32">
-					<Tool Name="MASM" AdditionalOptions="/coff /Zi"/>
+					<Tool Name="MASM" AdditionalOptions="/coff /Zi" UseSafeExceptionHandlers="true"/>
+				</FileConfiguration>
+				<FileConfiguration Name="Release|Win32">
+					<Tool Name="MASM" AdditionalOptions="/coff" UseSafeExceptionHandlers="true"/>
+				</FileConfiguration>
+				<FileConfiguration Name="Debug-MT|Win32">
+					<Tool Name="MASM" AdditionalOptions="/coff /Zi" UseSafeExceptionHandlers="true"/>
+				</FileConfiguration>
+				<FileConfiguration Name="Release-MT|Win32">
+					<Tool Name="MASM" AdditionalOptions="/coff" UseSafeExceptionHandlers="true"/>
 				</FileConfiguration>
 				<FileConfiguration Name="Debug|x64" ExcludedFromBuild="true">
-					<Tool Name="MASM" AdditionalOptions="/coff /Zi"/>
 				</FileConfiguration>
 				<FileConfiguration Name="Release|x64" ExcludedFromBuild="true">
-					<Tool Name="MASM"/>
-				</FileConfiguration>
-				<FileConfiguration Name="Debug-MT|Win32">
-					<Tool Name="MASM" AdditionalOptions="/coff /Zi"/>
 				</FileConfiguration>
 				<FileConfiguration Name="Debug-MT|x64" ExcludedFromBuild="true">
-					<Tool Name="MASM" AdditionalOptions="/coff /Zi"/>
 				</FileConfiguration>
 				<FileConfiguration Name="Release-MT|x64" ExcludedFromBuild="true">
-					<Tool Name="MASM"/>
 				</FileConfiguration>
 			</File>
 			<File RelativePath="..\..\src\tbb\intel64-masm\atomic_support.asm">
 				<FileConfiguration Name="Debug|Win32" ExcludedFromBuild="true">
 				</FileConfiguration>
+				<FileConfiguration Name="Release|Win32" ExcludedFromBuild="true">
+				</FileConfiguration>
+				<FileConfiguration Name="Debug-MT|Win32" ExcludedFromBuild="true">
+				</FileConfiguration>
+				<FileConfiguration Name="Release-MT|Win32" ExcludedFromBuild="true">
+				</FileConfiguration>
 				<FileConfiguration Name="Debug|x64">
 					<Tool Name="VCCustomBuildTool" Description="building atomic_support.obj" CommandLine="ml64 /Fo"intel64\Debug\atomic_support.obj" /DUSE_FRAME_POINTER /DEM64T=1 /c /Zi ../../src/tbb/intel64-masm/atomic_support.asm
 " Outputs="intel64\Debug\atomic_support.obj"/>
 				</FileConfiguration>
-				<FileConfiguration Name="Release|Win32" ExcludedFromBuild="true">
-				</FileConfiguration>
 				<FileConfiguration Name="Release|x64">
 					<Tool Name="VCCustomBuildTool" Description="building atomic_support.obj" CommandLine="ml64 /Fo"intel64\Release\atomic_support.obj"  /DEM64T=1 /c /Zi ../../src/tbb/intel64-masm/atomic_support.asm
 " Outputs="intel64\Release\atomic_support.obj"/>
 				</FileConfiguration>
-				<FileConfiguration Name="Debug-MT|Win32" ExcludedFromBuild="true">
-				</FileConfiguration>
 				<FileConfiguration Name="Debug-MT|x64">
 					<Tool Name="VCCustomBuildTool" Description="building atomic_support.obj" CommandLine="ml64 /Fo"intel64\Debug-MT\atomic_support.obj" /DUSE_FRAME_POINTER /DEM64T=1 /c /Zi ../../src/tbb/intel64-masm/atomic_support.asm
 " Outputs="intel64\Debug-MT\atomic_support.obj"/>
 				</FileConfiguration>
-				<FileConfiguration Name="Release-MT|Win32" ExcludedFromBuild="true">
-				</FileConfiguration>
 				<FileConfiguration Name="Release-MT|x64">
 					<Tool Name="VCCustomBuildTool" Description="building atomic_support.obj" CommandLine="ml64 /Fo"intel64\Release-MT\atomic_support.obj"  /DEM64T=1 /c /Zi ../../src/tbb/intel64-masm/atomic_support.asm
 " Outputs="intel64\Release-MT\atomic_support.obj"/>
@@ -222,42 +224,96 @@
 			<File RelativePath="..\..\src\tbb\intel64-masm\intel64_misc.asm">
 				<FileConfiguration Name="Debug|Win32" ExcludedFromBuild="true">
 				</FileConfiguration>
+				<FileConfiguration Name="Release|Win32" ExcludedFromBuild="true">
+				</FileConfiguration>
+				<FileConfiguration Name="Debug-MT|Win32" ExcludedFromBuild="true">
+				</FileConfiguration>
+				<FileConfiguration Name="Release-MT|Win32" ExcludedFromBuild="true">
+				</FileConfiguration>
 				<FileConfiguration Name="Debug|x64">
 					<Tool Name="VCCustomBuildTool" Description="building intel64_misc.obj" CommandLine="ml64 /Fo"intel64\Debug\intel64_misc.obj" /DUSE_FRAME_POINTER /DEM64T=1 /c /Zi ../../src/tbb/intel64-masm/intel64_misc.asm
 " Outputs="intel64\Debug\intel64_misc.obj"/>
 				</FileConfiguration>
-				<FileConfiguration Name="Release|Win32" ExcludedFromBuild="true">
-				</FileConfiguration>
 				<FileConfiguration Name="Release|x64">
 					<Tool Name="VCCustomBuildTool" Description="building intel64_misc.obj" CommandLine="ml64 /Fo"intel64\Release\intel64_misc.obj"  /DEM64T=1 /c /Zi ../../src/tbb/intel64-masm/intel64_misc.asm
 " Outputs="intel64\Release\intel64_misc.obj"/>
 				</FileConfiguration>
-				<FileConfiguration Name="Debug-MT|Win32" ExcludedFromBuild="true">
-				</FileConfiguration>
 				<FileConfiguration Name="Debug-MT|x64">
 					<Tool Name="VCCustomBuildTool" Description="building intel64_misc.obj" CommandLine="ml64 /Fo"intel64\Debug-MT\intel64_misc.obj" /DUSE_FRAME_POINTER /DEM64T=1 /c /Zi ../../src/tbb/intel64-masm/intel64_misc.asm
 " Outputs="intel64\Debug-MT\intel64_misc.obj"/>
 				</FileConfiguration>
-				<FileConfiguration Name="Release-MT|Win32" ExcludedFromBuild="true">
-				</FileConfiguration>
 				<FileConfiguration Name="Release-MT|x64">
 					<Tool Name="VCCustomBuildTool" Description="building intel64_misc.obj" CommandLine="ml64 /Fo"intel64\Release-MT\intel64_misc.obj"  /DEM64T=1 /c /Zi ../../src/tbb/intel64-masm/intel64_misc.asm
 " Outputs="intel64\Release-MT\intel64_misc.obj"/>
 				</FileConfiguration>
 			</File>
-			<File RelativePath="..\..\src\tbb\ia32-masm\lock_byte.asm">
+            <File RelativePath="..\..\src\tbb\ia32-masm\itsx.asm">
 				<FileConfiguration Name="Debug|Win32">
-					<Tool Name="MASM" AdditionalOptions="/coff /Zi"/>
+					<Tool Name="MASM" AdditionalOptions="/coff /Zi" UseSafeExceptionHandlers="true"/>
+				</FileConfiguration>
+				<FileConfiguration Name="Release|Win32">
+					<Tool Name="MASM" AdditionalOptions="/coff" UseSafeExceptionHandlers="true"/>
+				</FileConfiguration>
+				<FileConfiguration Name="Debug-MT|Win32">
+					<Tool Name="MASM" AdditionalOptions="/coff /Zi" UseSafeExceptionHandlers="true"/>
+				</FileConfiguration>
+				<FileConfiguration Name="Release-MT|Win32">
+					<Tool Name="MASM" AdditionalOptions="/coff" UseSafeExceptionHandlers="true"/>
+				</FileConfiguration>
+				<FileConfiguration Name="Release-MT|x64" ExcludedFromBuild="true">
 				</FileConfiguration>
 				<FileConfiguration Name="Debug|x64" ExcludedFromBuild="true">
-					<Tool Name="MASM" AdditionalOptions="/coff /Zi"/>
 				</FileConfiguration>
 				<FileConfiguration Name="Release|x64" ExcludedFromBuild="true">
-					<Tool Name="MASM"/>
+				</FileConfiguration>
+				<FileConfiguration Name="Debug-MT|x64" ExcludedFromBuild="true">
+				</FileConfiguration>
+			</File>
+            <File RelativePath="..\..\src\tbb\intel64-masm\itsx.asm">
+				<FileConfiguration Name="Debug|Win32" ExcludedFromBuild="true">
+				</FileConfiguration>
+				<FileConfiguration Name="Release|Win32" ExcludedFromBuild="true">
+				</FileConfiguration>
+				<FileConfiguration Name="Debug-MT|Win32" ExcludedFromBuild="true">
+				</FileConfiguration>
+				<FileConfiguration Name="Release-MT|Win32" ExcludedFromBuild="true">
+				</FileConfiguration>
+				<FileConfiguration Name="Debug|x64">
+					<Tool Name="VCCustomBuildTool" Description="building itsx.obj" CommandLine="ml64 /Fo"intel64\Debug\itsx.obj" /DUSE_FRAME_POINTER /DEM64T=1 /c /Zi ../../src/tbb/intel64-masm/itsx.asm
+" Outputs="intel64\Debug\itsx.obj"/>
+				</FileConfiguration>
+				<FileConfiguration Name="Release|x64">
+					<Tool Name="VCCustomBuildTool" Description="building itsx.obj" CommandLine="ml64 /Fo"intel64\Release\itsx.obj"  /DEM64T=1 /c /Zi ../../src/tbb/intel64-masm/itsx.asm
+" Outputs="intel64\Release\itsx.obj"/>
+				</FileConfiguration>
+				<FileConfiguration Name="Debug-MT|x64">
+					<Tool Name="VCCustomBuildTool" Description="building itsx.obj" CommandLine="ml64 /Fo"intel64\Debug-MT\itsx.obj" /DUSE_FRAME_POINTER /DEM64T=1 /c /Zi ../../src/tbb/intel64-masm/itsx.asm
+" Outputs="intel64\Debug-MT\itsx.obj"/>
+				</FileConfiguration>
+				<FileConfiguration Name="Release-MT|x64">
+					<Tool Name="VCCustomBuildTool" Description="building itsx.obj" CommandLine="ml64 /Fo"intel64\Release-MT\itsx.obj"  /DEM64T=1 /c /Zi ../../src/tbb/intel64-masm/itsx.asm
+" Outputs="intel64\Release-MT\itsx.obj"/>
+				</FileConfiguration>
+			</File>
+			<File RelativePath="..\..\src\tbb\ia32-masm\lock_byte.asm">
+				<FileConfiguration Name="Debug|Win32">
+					<Tool Name="MASM" AdditionalOptions="/coff /Zi" UseSafeExceptionHandlers="true"/>
+				</FileConfiguration>
+				<FileConfiguration Name="Release|Win32">
+					<Tool Name="MASM" AdditionalOptions="/coff" UseSafeExceptionHandlers="true"/>
 				</FileConfiguration>
 				<FileConfiguration Name="Debug-MT|Win32">
+					<Tool Name="MASM" AdditionalOptions="/coff /Zi" UseSafeExceptionHandlers="true"/>
+				</FileConfiguration>
+				<FileConfiguration Name="Release-MT|Win32">
+					<Tool Name="MASM" AdditionalOptions="/coff" UseSafeExceptionHandlers="true"/>
+				</FileConfiguration>
+				<FileConfiguration Name="Debug|x64" ExcludedFromBuild="true">
 					<Tool Name="MASM" AdditionalOptions="/coff /Zi"/>
 				</FileConfiguration>
+				<FileConfiguration Name="Release|x64" ExcludedFromBuild="true">
+					<Tool Name="MASM"/>
+				</FileConfiguration>
 				<FileConfiguration Name="Debug-MT|x64" ExcludedFromBuild="true">
 					<Tool Name="MASM" AdditionalOptions="/coff /Zi"/>
 				</FileConfiguration>
diff --git a/build/windows.cl.inc b/build/windows.cl.inc
index 0e4dd40..0918d5e 100644
--- a/build/windows.cl.inc
+++ b/build/windows.cl.inc
@@ -115,7 +115,7 @@ ASSEMBLY_SOURCE=$(arch)-masm
 ifeq (intel64,$(arch))
     ASM=ml64 /nologo
     ASM_FLAGS += /DEM64T=1 /c /Zi
-    TBB_ASM.OBJ = atomic_support.obj intel64_misc.obj
+    TBB_ASM.OBJ = atomic_support.obj intel64_misc.obj itsx.obj
     MALLOC_ASM.OBJ = atomic_support.obj
 else
 ifeq (armv7,$(arch))
@@ -124,7 +124,7 @@ ifeq (armv7,$(arch))
 else
     ASM=ml /nologo
     ASM_FLAGS += /c /coff /Zi /safeseh
-    TBB_ASM.OBJ = atomic_support.obj lock_byte.obj
+    TBB_ASM.OBJ = atomic_support.obj lock_byte.obj itsx.obj
 endif
 endif
 #------------------------------------------------------------------------------
diff --git a/build/windows.icl.inc b/build/windows.icl.inc
index ae12dc7..5110b21 100644
--- a/build/windows.icl.inc
+++ b/build/windows.icl.inc
@@ -148,12 +148,12 @@ ASSEMBLY_SOURCE=$(arch)-masm
 ifeq (intel64,$(arch))
     ASM=ml64 /nologo
     ASM_FLAGS += /DEM64T=1 /c /Zi
-    TBB_ASM.OBJ = atomic_support.obj intel64_misc.obj
+    TBB_ASM.OBJ = atomic_support.obj intel64_misc.obj itsx.obj
     MALLOC_ASM.OBJ = atomic_support.obj
 else
     ASM=ml /nologo
     ASM_FLAGS += /c /coff /Zi /safeseh
-    TBB_ASM.OBJ = atomic_support.obj lock_byte.obj
+    TBB_ASM.OBJ = atomic_support.obj lock_byte.obj itsx.obj
 endif
 #------------------------------------------------------------------------------
 # End of setting assembler data.
diff --git a/doc/Release_Notes.txt b/doc/Release_Notes.txt
index 985e362..0586105 100644
--- a/doc/Release_Notes.txt
+++ b/doc/Release_Notes.txt
@@ -1,6 +1,6 @@
 ------------------------------------------------------------------------
 Intel(R) Threading Building Blocks - Release Notes
-		  Version 4.1
+		  Version 4.2
 ------------------------------------------------------------------------
 
 
@@ -52,21 +52,20 @@ Software - Supported Operating Systems
 
     Systems with Microsoft* Windows* operating systems
 	Microsoft* Windows* 8
-	Microsoft* Windows* 7 SP1 
+	Microsoft* Windows* 7 SP1
 	Microsoft* Windows* Server 2012
 	Microsoft* Windows* Server 2008 SP2
 	Microsoft* Windows* Server 2008 R2 SP1
 	Microsoft* Windows* XP Professional SP3
     Systems with Linux* operating systems
-	Red Hat* Enterprise Linux* 6, 5
-	Fedora* 17
-	Debian* 6.0
-	Ubuntu* 11.10, 12.04
+	Red Hat* Enterprise Linux* 5, 6
+	Fedora* 18, 19
+	Debian* 6.0, 7
+	Ubuntu* 12.04, 13.04
 	SuSE* Linux* Enterprise Server 10, 11SP2
-	Pardus* 2011.2 (x64 only)
 	Intel(R) Cluster Ready
     Systems with OS X* operating systems
-	OS X* 10.7.5 or higher
+	OS X* 10.8 or higher
 
 Software - Supported Compilers
 
diff --git a/doc/html/a00019.html b/doc/html/a00019.html
index dc39482..750c59a 100644
--- a/doc/html/a00019.html
+++ b/doc/html/a00019.html
@@ -30,6 +30,7 @@
   <tr class="memlist"><td><a class="el" href="a00281.html#46b9896317662c3cfa3c876ad7592a7c">concurrent_hash_map</a>(size_type n, const allocator_type &a=allocator_type())</td><td><a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a></td><td><code> [inline]</code></td></tr>
   <tr class="memlist"><td><a class="el" href="a00281.html#6fb14710893308fb47aaeee55ee30dc3">concurrent_hash_map</a>(const concurrent_hash_map &table, const allocator_type &a=allocator_type())</td><td><a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a></td><td><code> [inline]</code></td></tr>
   <tr class="memlist"><td><a class="el" href="a00281.html#83c40f2053f208861b90390e12a36436">concurrent_hash_map</a>(I first, I last, const allocator_type &a=allocator_type())</td><td><a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a></td><td><code> [inline]</code></td></tr>
+  <tr class="memlist"><td><a class="el" href="a00281.html#3bb2e3526ed741f0689c00d6cd022431">concurrent_hash_map</a>(const std::initializer_list< value_type > &il, const allocator_type &a=allocator_type())</td><td><a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a></td><td><code> [inline]</code></td></tr>
   <tr bgcolor="#f0f0f0"><td><b>const_accessor</b> (defined in <a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a>)</td><td><a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a></td><td><code> [friend]</code></td></tr>
   <tr bgcolor="#f0f0f0"><td><b>const_iterator</b> typedef (defined in <a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a>)</td><td><a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a></td><td></td></tr>
   <tr bgcolor="#f0f0f0"><td><b>const_pointer</b> typedef (defined in <a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a>)</td><td><a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a></td><td></td></tr>
@@ -71,6 +72,7 @@
   <tr bgcolor="#f0f0f0"><td><b>my_hash_compare</b> (defined in <a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a>)</td><td><a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a></td><td><code> [protected]</code></td></tr>
   <tr bgcolor="#f0f0f0"><td><b>node_allocator_type</b> typedef (defined in <a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a>)</td><td><a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a></td><td><code> [protected]</code></td></tr>
   <tr class="memlist"><td><a class="el" href="a00281.html#088d1aaccc816884a49e38f7065622c8">operator=</a>(const concurrent_hash_map &table)</td><td><a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a></td><td><code> [inline]</code></td></tr>
+  <tr class="memlist"><td><a class="el" href="a00281.html#f765500afa7f55480f3991cfbe826b28">operator=</a>(const std::initializer_list< value_type > &il)</td><td><a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a></td><td><code> [inline]</code></td></tr>
   <tr bgcolor="#f0f0f0"><td><b>pointer</b> typedef (defined in <a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a>)</td><td><a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a></td><td></td></tr>
   <tr bgcolor="#f0f0f0"><td><b>range</b>(size_type grainsize=1) (defined in <a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a>)</td><td><a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a></td><td><code> [inline]</code></td></tr>
   <tr bgcolor="#f0f0f0"><td><b>range</b>(size_type grainsize=1) const  (defined in <a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a>)</td><td><a class="el" href="a00281.html">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a></td><td><code> [inline]</code></td></tr>
diff --git a/doc/html/a00127.html b/doc/html/a00127.html
index 38ec61b..f47abc8 100644
--- a/doc/html/a00127.html
+++ b/doc/html/a00127.html
@@ -21,7 +21,7 @@
     <li><a href="functions.html"><span>Class Members</span></a></li>
   </ul></div>
 <h1>tbb::spin_mutex Member List</h1>This is the complete list of members for <a class="el" href="a00343.html">tbb::spin_mutex</a>, including all inherited members.<p><table>
-  <tr bgcolor="#f0f0f0"><td><b>internal_construct</b>() (defined in <a class="el" href="a00343.html">tbb::spin_mutex</a>)</td><td><a class="el" href="a00343.html">tbb::spin_mutex</a></td><td></td></tr>
+  <tr class="memlist"><td><a class="el" href="a00343.html#4b3fa21632815f8fab2fd6c67ec0d48c">internal_construct</a>()</td><td><a class="el" href="a00343.html">tbb::spin_mutex</a></td><td></td></tr>
   <tr bgcolor="#f0f0f0"><td><b>is_fair_mutex</b> (defined in <a class="el" href="a00343.html">tbb::spin_mutex</a>)</td><td><a class="el" href="a00343.html">tbb::spin_mutex</a></td><td><code> [static]</code></td></tr>
   <tr bgcolor="#f0f0f0"><td><b>is_recursive_mutex</b> (defined in <a class="el" href="a00343.html">tbb::spin_mutex</a>)</td><td><a class="el" href="a00343.html">tbb::spin_mutex</a></td><td><code> [static]</code></td></tr>
   <tr bgcolor="#f0f0f0"><td><b>is_rw_mutex</b> (defined in <a class="el" href="a00343.html">tbb::spin_mutex</a>)</td><td><a class="el" href="a00343.html">tbb::spin_mutex</a></td><td><code> [static]</code></td></tr>
diff --git a/doc/html/a00281.html b/doc/html/a00281.html
index b3234e6..66ba2d2 100644
--- a/doc/html/a00281.html
+++ b/doc/html/a00281.html
@@ -99,10 +99,18 @@ template<typename I> </td></tr>
 <tr><td class="memTemplItemLeft" nowrap align="right" valign="top"> </td><td class="memTemplItemRight" valign="bottom"><a class="el" href="a00281.html#83c40f2053f208861b90390e12a36436">concurrent_hash_map</a> (I first, I last, const allocator_type &a=allocator_type())</td></tr>
 
 <tr><td class="mdescLeft"> </td><td class="mdescRight">Construction with copying iteration range and given allocator instance. <br></td></tr>
+<tr><td class="memItemLeft" nowrap align="right" valign="top"><a class="anchor" name="3bb2e3526ed741f0689c00d6cd022431"></a><!-- doxytag: member="tbb::interface5::concurrent_hash_map::concurrent_hash_map" ref="3bb2e3526ed741f0689c00d6cd022431" args="(const std::initializer_list< value_type > &il, const allocator_type &a=allocator_type())" -->
+ </td><td class="memItemRight" valign="bottom"><a class="el" href="a00281.html#3bb2e3526ed741f0689c00d6cd022431">concurrent_hash_map</a> (const std::initializer_list< value_type > &il, const allocator_type &a=allocator_type())</td></tr>
+
+<tr><td class="mdescLeft"> </td><td class="mdescRight">Construct empty table with n preallocated buckets. This number serves also as initial concurrency level. <br></td></tr>
 <tr><td class="memItemLeft" nowrap align="right" valign="top"><a class="anchor" name="088d1aaccc816884a49e38f7065622c8"></a><!-- doxytag: member="tbb::interface5::concurrent_hash_map::operator=" ref="088d1aaccc816884a49e38f7065622c8" args="(const concurrent_hash_map &table)" -->
 <a class="el" href="a00281.html">concurrent_hash_map</a> & </td><td class="memItemRight" valign="bottom"><a class="el" href="a00281.html#088d1aaccc816884a49e38f7065622c8">operator=</a> (const <a class="el" href="a00281.html">concurrent_hash_map</a> &table)</td></tr>
 
 <tr><td class="mdescLeft"> </td><td class="mdescRight">Assignment. <br></td></tr>
+<tr><td class="memItemLeft" nowrap align="right" valign="top"><a class="anchor" name="f765500afa7f55480f3991cfbe826b28"></a><!-- doxytag: member="tbb::interface5::concurrent_hash_map::operator=" ref="f765500afa7f55480f3991cfbe826b28" args="(const std::initializer_list< value_type > &il)" -->
+<a class="el" href="a00281.html">concurrent_hash_map</a> & </td><td class="memItemRight" valign="bottom"><a class="el" href="a00281.html#f765500afa7f55480f3991cfbe826b28">operator=</a> (const std::initializer_list< value_type > &il)</td></tr>
+
+<tr><td class="mdescLeft"> </td><td class="mdescRight">Assignment. <br></td></tr>
 <tr><td class="memItemLeft" nowrap align="right" valign="top">void </td><td class="memItemRight" valign="bottom"><a class="el" href="a00281.html#94758113d8993cfe5afdf2d63a728869">rehash</a> (size_type n=0)</td></tr>
 
 <tr><td class="mdescLeft"> </td><td class="mdescRight">Rehashes and optionally resizes the whole table.  <a href="#94758113d8993cfe5afdf2d63a728869"></a><br></td></tr>
diff --git a/doc/html/a00343.html b/doc/html/a00343.html
index e779206..a63b25a 100644
--- a/doc/html/a00343.html
+++ b/doc/html/a00343.html
@@ -37,8 +37,9 @@
 
 <tr><td class="mdescLeft"> </td><td class="mdescRight">Construct unacquired lock.  <a href="#3d8fb44644fd8d41ada1fbeba7409be3"></a><br></td></tr>
 <tr><td class="memItemLeft" nowrap align="right" valign="top"><a class="anchor" name="4b3fa21632815f8fab2fd6c67ec0d48c"></a><!-- doxytag: member="tbb::spin_mutex::internal_construct" ref="4b3fa21632815f8fab2fd6c67ec0d48c" args="()" -->
-void __TBB_EXPORTED_METHOD </td><td class="memItemRight" valign="bottom"><b>internal_construct</b> ()</td></tr>
+void __TBB_EXPORTED_METHOD </td><td class="memItemRight" valign="bottom"><a class="el" href="a00343.html#4b3fa21632815f8fab2fd6c67ec0d48c">internal_construct</a> ()</td></tr>
 
+<tr><td class="mdescLeft"> </td><td class="mdescRight">Internal constructor with ITT instrumentation. <br></td></tr>
 <tr><td class="memItemLeft" nowrap align="right" valign="top"><a class="anchor" name="4f748989e19b6045e3a2d2ee73626a28"></a><!-- doxytag: member="tbb::spin_mutex::lock" ref="4f748989e19b6045e3a2d2ee73626a28" args="()" -->
 void </td><td class="memItemRight" valign="bottom"><a class="el" href="a00343.html#4f748989e19b6045e3a2d2ee73626a28">lock</a> ()</td></tr>
 
diff --git a/doc/html/a00405.html b/doc/html/a00405.html
index 5a39239..24e622b 100644
--- a/doc/html/a00405.html
+++ b/doc/html/a00405.html
@@ -47,13 +47,32 @@ typedef void *(*) </td><td class="memItemRight" valign="bottom"><b>rml::raw
 typedef int(*) </td><td class="memItemRight" valign="bottom"><b>rml::rawFreeType</b> (intptr_t pool_id, void *raw_ptr, size_t raw_bytes)</td></tr>
 
 <tr><td colspan="2"><br><h2>Enumerations</h2></td></tr>
-<tr><td class="memItemLeft" nowrap align="right" valign="top">enum  </td><td class="memItemRight" valign="bottom"><b>AllocationModeParam</b> { <b>USE_HUGE_PAGES</b>
+<tr><td class="memItemLeft" nowrap align="right" valign="top">enum  </td><td class="memItemRight" valign="bottom"><b>ScalableAllocationResult</b> { <br>
+  <b>TBBMALLOC_OK</b>, 
+<b>TBBMALLOC_INVALID_PARAM</b>, 
+<b>TBBMALLOC_UNSUPPORTED</b>, 
+<b>TBBMALLOC_NO_MEMORY</b>, 
+<br>
+  <b>TBBMALLOC_NO_EFFECT</b>
+<br>
  }</td></tr>
 
-<tr><td class="memItemLeft" nowrap align="right" valign="top">enum  </td><td class="memItemRight" valign="bottom"><b>MemPoolError</b> { <b>POOL_OK</b>, 
-<b>INVALID_POLICY</b>, 
-<b>UNSUPPORTED_POLICY</b>, 
-<b>NO_MEMORY</b>
+<tr><td class="memItemLeft" nowrap align="right" valign="top">enum  </td><td class="memItemRight" valign="bottom"><b>AllocationModeParam</b> { <b>TBBMALLOC_USE_HUGE_PAGES</b>, 
+<b>USE_HUGE_PAGES</b> =  TBBMALLOC_USE_HUGE_PAGES
+ }</td></tr>
+
+<tr><td class="memItemLeft" nowrap align="right" valign="top">enum  </td><td class="memItemRight" valign="bottom"><b>ScalableAllocationCmd</b> { <b>TBBMALLOC_CLEAN_ALL_BUFFERS</b>, 
+<b>TBBMALLOC_CLEAN_THREAD_BUFFERS</b>
+ }</td></tr>
+
+<tr><td class="memItemLeft" nowrap align="right" valign="top">enum  </td><td class="memItemRight" valign="bottom"><b>MemPoolError</b> { <br>
+  <b>POOL_OK</b> =  TBBMALLOC_OK, 
+<b>INVALID_POLICY</b> =  TBBMALLOC_INVALID_PARAM, 
+<b>UNSUPPORTED_POLICY</b> =  TBBMALLOC_UNSUPPORTED, 
+<b>NO_MEMORY</b> =  TBBMALLOC_NO_MEMORY, 
+<br>
+  <b>NO_EFFECT</b> =  TBBMALLOC_NO_EFFECT
+<br>
  }</td></tr>
 
 <tr><td colspan="2"><br><h2>Functions</h2></td></tr>
@@ -77,6 +96,8 @@ typedef int(*) </td><td class="memItemRight" valign="bottom"><b>rml::rawFre
 
 <tr><td class="memItemLeft" nowrap align="right" valign="top">int __TBB_EXPORTED_FUNC </td><td class="memItemRight" valign="bottom"><a class="el" href="a00443.html#gb9ee52ffc5400f15c3d8af8c7613c05a">scalable_allocation_mode</a> (int param, intptr_t value)</td></tr>
 
+<tr><td class="memItemLeft" nowrap align="right" valign="top">int __TBB_EXPORTED_FUNC </td><td class="memItemRight" valign="bottom"><a class="el" href="a00443.html#gcdbd064aec22571ec84e906166a831a2">scalable_allocation_command</a> (int cmd, void *param)</td></tr>
+
 <tr><td class="memItemLeft" nowrap align="right" valign="top"><a class="anchor" name="7be7b591205c8bf497261d40c0304ab9"></a><!-- doxytag: member="scalable_allocator.h::pool_create_v1" ref="7be7b591205c8bf497261d40c0304ab9" args="(intptr_t pool_id, const MemPoolPolicy *policy, rml::MemoryPool **pool)" -->
 MemPoolError </td><td class="memItemRight" valign="bottom"><b>rml::pool_create_v1</b> (intptr_t pool_id, const MemPoolPolicy *policy, rml::MemoryPool **pool)</td></tr>
 
diff --git a/doc/html/a00425.html b/doc/html/a00425.html
index 4e2eceb..c016105 100644
--- a/doc/html/a00425.html
+++ b/doc/html/a00425.html
@@ -35,10 +35,14 @@ typedef void *(*) </td><td class="memItemRight" valign="bottom"><b>rawAlloc
 typedef int(*) </td><td class="memItemRight" valign="bottom"><b>rawFreeType</b> (intptr_t pool_id, void *raw_ptr, size_t raw_bytes)</td></tr>
 
 <tr><td colspan="2"><br><h2>Enumerations</h2></td></tr>
-<tr><td class="memItemLeft" nowrap align="right" valign="top">enum  </td><td class="memItemRight" valign="bottom"><b>MemPoolError</b> { <b>POOL_OK</b>, 
-<b>INVALID_POLICY</b>, 
-<b>UNSUPPORTED_POLICY</b>, 
-<b>NO_MEMORY</b>
+<tr><td class="memItemLeft" nowrap align="right" valign="top">enum  </td><td class="memItemRight" valign="bottom"><b>MemPoolError</b> { <br>
+  <b>POOL_OK</b> =  TBBMALLOC_OK, 
+<b>INVALID_POLICY</b> =  TBBMALLOC_INVALID_PARAM, 
+<b>UNSUPPORTED_POLICY</b> =  TBBMALLOC_UNSUPPORTED, 
+<b>NO_MEMORY</b> =  TBBMALLOC_NO_MEMORY, 
+<br>
+  <b>NO_EFFECT</b> =  TBBMALLOC_NO_EFFECT
+<br>
  }</td></tr>
 
 <tr><td colspan="2"><br><h2>Functions</h2></td></tr>
diff --git a/doc/html/a00428.html b/doc/html/a00428.html
index d72684f..4094825 100644
--- a/doc/html/a00428.html
+++ b/doc/html/a00428.html
@@ -451,6 +451,14 @@ template<typename T> </td></tr>
 <tr><td class="memItemLeft" nowrap align="right" valign="top"><a class="anchor" name="bf4486f36580f7d4bf95aed2e052a380"></a><!-- doxytag: member="tbb::critical_section" ref="bf4486f36580f7d4bf95aed2e052a380" args="" -->
 typedef internal::critical_section_v4 </td><td class="memItemRight" valign="bottom"><b>critical_section</b></td></tr>
 
+<tr><td class="memItemLeft" nowrap align="right" valign="top">typedef interface7::internal::padded_mutex<<br>
+ interface7::internal::x86_eliding_mutex > </td><td class="memItemRight" valign="bottom"><a class="el" href="a00444.html#g645a9ea2a448def7e529d4ba0c6f39b8">speculative_spin_mutex</a></td></tr>
+
+<tr><td class="mdescLeft"> </td><td class="mdescRight">A cross-platform spin mutex with speculative lock acquisition.  <a href="a00444.html#g645a9ea2a448def7e529d4ba0c6f39b8"></a><br></td></tr>
+<tr><td class="memItemLeft" nowrap align="right" valign="top"><a class="anchor" name="5e90a4e5887a152c32e2499b7e074f3c"></a><!-- doxytag: member="tbb::speculative_spin_mutex" ref="5e90a4e5887a152c32e2499b7e074f3c" args="" -->
+typedef interface7::internal::padded_mutex<<br>
+ <a class="el" href="a00343.html">spin_mutex</a> > </td><td class="memItemRight" valign="bottom"><b>speculative_spin_mutex</b></td></tr>
+
 <tr><td class="memItemLeft" nowrap align="right" valign="top"><a class="anchor" name="7fdc2b067a243747d4c3dfe6f3d28476"></a><!-- doxytag: member="tbb::spin_rw_mutex" ref="7fdc2b067a243747d4c3dfe6f3d28476" args="" -->
 typedef <a class="el" href="a00345.html">spin_rw_mutex_v3</a> </td><td class="memItemRight" valign="bottom"><b>spin_rw_mutex</b></td></tr>
 
diff --git a/doc/html/a00443.html b/doc/html/a00443.html
index 72629dd..15f5e2e 100644
--- a/doc/html/a00443.html
+++ b/doc/html/a00443.html
@@ -71,6 +71,8 @@
 
 <tr><td class="memItemLeft" nowrap align="right" valign="top">int __TBB_EXPORTED_FUNC </td><td class="memItemRight" valign="bottom"><a class="el" href="a00443.html#gb9ee52ffc5400f15c3d8af8c7613c05a">scalable_allocation_mode</a> (int param, intptr_t value)</td></tr>
 
+<tr><td class="memItemLeft" nowrap align="right" valign="top">int __TBB_EXPORTED_FUNC </td><td class="memItemRight" valign="bottom"><a class="el" href="a00443.html#gcdbd064aec22571ec84e906166a831a2">scalable_allocation_command</a> (int cmd, void *param)</td></tr>
+
 </table>
 <hr><h2>Function Documentation</h2>
 <a class="anchor" name="g65a20e812012f15ec7442d5b45d0cba5"></a><!-- doxytag: member="scalable_allocator.h::scalable_aligned_free" ref="g65a20e812012f15ec7442d5b45d0cba5" args="(void *ptr)" -->
@@ -157,6 +159,35 @@ The "_aligned_malloc" analogue.
 The "_aligned_realloc" analogue. 
 </div>
 </div><p>
+<a class="anchor" name="gcdbd064aec22571ec84e906166a831a2"></a><!-- doxytag: member="scalable_allocator.h::scalable_allocation_command" ref="gcdbd064aec22571ec84e906166a831a2" args="(int cmd, void *param)" -->
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int __TBB_EXPORTED_FUNC scalable_allocation_command           </td>
+          <td>(</td>
+          <td class="paramtype">int </td>
+          <td class="paramname"> <em>cmd</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">void * </td>
+          <td class="paramname"> <em>param</em></td><td> </td>
+        </tr>
+        <tr>
+          <td></td>
+          <td>)</td>
+          <td></td><td></td><td width="100%"></td>
+        </tr>
+      </table>
+</div>
+<div class="memdoc">
+
+<p>
+Call TBB allocator-specific commands. 
+</div>
+</div><p>
 <a class="anchor" name="gb9ee52ffc5400f15c3d8af8c7613c05a"></a><!-- doxytag: member="scalable_allocator.h::scalable_allocation_mode" ref="gb9ee52ffc5400f15c3d8af8c7613c05a" args="(int param, intptr_t value)" -->
 <div class="memitem">
 <div class="memproto">
diff --git a/doc/html/a00444.html b/doc/html/a00444.html
index 4f101ba..0af823a 100644
--- a/doc/html/a00444.html
+++ b/doc/html/a00444.html
@@ -47,7 +47,30 @@
 <tr><td class="memItemLeft" nowrap align="right" valign="top">class  </td><td class="memItemRight" valign="bottom"><a class="el" href="a00345.html">tbb::spin_rw_mutex_v3</a></td></tr>
 
 <tr><td class="mdescLeft"> </td><td class="mdescRight">Fast, unfair, spinning reader-writer lock with backoff and writer-preference.  <a href="a00345.html#_details">More...</a><br></td></tr>
+<tr><td colspan="2"><br><h2>Typedefs</h2></td></tr>
+<tr><td class="memItemLeft" nowrap align="right" valign="top">typedef interface7::internal::padded_mutex<<br>
+ interface7::internal::x86_eliding_mutex > </td><td class="memItemRight" valign="bottom"><a class="el" href="a00444.html#g645a9ea2a448def7e529d4ba0c6f39b8">tbb::speculative_spin_mutex</a></td></tr>
+
+<tr><td class="mdescLeft"> </td><td class="mdescRight">A cross-platform spin mutex with speculative lock acquisition.  <a href="#g645a9ea2a448def7e529d4ba0c6f39b8"></a><br></td></tr>
 </table>
+<hr><h2>Typedef Documentation</h2>
+<a class="anchor" name="g645a9ea2a448def7e529d4ba0c6f39b8"></a><!-- doxytag: member="tbb::speculative_spin_mutex" ref="g645a9ea2a448def7e529d4ba0c6f39b8" args="" -->
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">typedef interface7::internal::padded_mutex<interface7::internal::x86_eliding_mutex> <a class="el" href="a00444.html#g645a9ea2a448def7e529d4ba0c6f39b8">tbb::speculative_spin_mutex</a>          </td>
+        </tr>
+      </table>
+</div>
+<div class="memdoc">
+
+<p>
+A cross-platform spin mutex with speculative lock acquisition. 
+<p>
+On platforms with proper HW support, this lock may speculatively execute its critical sections, using HW mechanisms to detect real data races and ensure atomicity of the critical sections. In particular, it uses Intel(R) Transactional Synchronization Extensions (Intel(R) TSX). Without such HW support, it behaves like a <a class="el" href="a00343.html">spin_mutex</a>. It should be used for locking short critical sections where the lock is contended but the data it protects are not. If zer [...]
+</div>
+</div><p>
 <hr>
 <p></p>
 Copyright © 2005-2013 Intel Corporation.  All Rights Reserved.
diff --git a/doc/html/functions_0x63.html b/doc/html/functions_0x63.html
index a6f0648..72819e6 100644
--- a/doc/html/functions_0x63.html
+++ b/doc/html/functions_0x63.html
@@ -72,7 +72,7 @@ Here is a list of all documented class members with links to the class documenta
 : <a class="el" href="a00273.html#3336ba9480fd6c43e158f9beb024c050">tbb::blocked_range3d< PageValue, RowValue, ColValue ></a>, <a class="el" href="a00272.html#392a46759af2c884957115771affa7f4">tbb::blocked_range2d< RowValue, ColValue ></a><li>compact()
 : <a class="el" href="a00288.html#1693d1da41b1a8235871be9c6633be35">tbb::concurrent_vector< T, A ></a><li>concurrent_bounded_queue()
 : <a class="el" href="a00280.html#a5e04dcd7db9fd9b583b4e7df832246a">tbb::concurrent_bounded_queue< T, A ></a><li>concurrent_hash_map()
-: <a class="el" href="a00281.html#83c40f2053f208861b90390e12a36436">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>concurrent_priority_queue()
+: <a class="el" href="a00281.html#3bb2e3526ed741f0689c00d6cd022431">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>concurrent_priority_queue()
 : <a class="el" href="a00285.html#c8b20e7430c5302936030bef59a562be">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a><li>concurrent_queue()
 : <a class="el" href="a00287.html#9102b897776bd2d9e908e6604ff16b5f">tbb::deprecated::concurrent_queue< T, A ></a>, <a class="el" href="a00286.html#8a6b98ea11a867db8ac868f0113ca429">tbb::strict_ppl::concurrent_queue< T, A ></a><li>concurrent_vector()
 : <a class="el" href="a00288.html#4450de83c5862ea4bcd9443fd7e67419">tbb::concurrent_vector< T, A ></a><li>const_accessor()
diff --git a/doc/html/functions_0x69.html b/doc/html/functions_0x69.html
index f8506d4..5e8c6f5 100644
--- a/doc/html/functions_0x69.html
+++ b/doc/html/functions_0x69.html
@@ -68,7 +68,8 @@ Here is a list of all documented class members with links to the class documenta
 : <a class="el" href="a00352.html#49a55352084fd44b8863d182e839e6dc">tbb::task_group_context</a><li>initialize()
 : <a class="el" href="a00355.html#d5ed214a8bb53b0466ed91ff4734b9a3">tbb::task_scheduler_init</a>, <a class="el" href="a00350.html#ff36aa5ec1305ca9931396fa608981da">tbb::interface6::task_arena</a><li>input_type
 : <a class="el" href="a00304.html#035196d3c9240ef041f528ebcde8baa7">tbb::flow::interface6::limiter_node< T ></a>, <a class="el" href="a00324.html#b2829b518979874ad3d2a939e14ae7bd">tbb::flow::interface6::priority_queue_node< T, Compare, A ></a>, <a class="el" href="a00340.html#af629f26832ff4e476e240637a78bc0c">tbb::flow::interface6::sequencer_node< T, A ></a>, <a class="el" href="a00325.html#1e71030845210b6c4c7380eb9e11a2ac">tbb::flow::interface6::queue_node< T, A &gt [...]
-: <a class="el" href="a00281.html#1dd37fad87e561151ba1e242ca94bcc1">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>internal_copy()
+: <a class="el" href="a00281.html#1dd37fad87e561151ba1e242ca94bcc1">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>internal_construct()
+: <a class="el" href="a00343.html#4b3fa21632815f8fab2fd6c67ec0d48c">tbb::spin_mutex</a><li>internal_copy()
 : <a class="el" href="a00281.html#72c9c9e9655fcf096f5f0ed9c8ba6669">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>internal_equal_range()
 : <a class="el" href="a00281.html#8f5373b8e1864619d1ffcf3bf3f1f13d">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>internal_fast_find()
 : <a class="el" href="a00281.html#2f76ed101a0ccc8875b846c2f747897e">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>internal_forward_task()
diff --git a/doc/html/functions_0x6f.html b/doc/html/functions_0x6f.html
index 35d5500..4d26b0b 100644
--- a/doc/html/functions_0x6f.html
+++ b/doc/html/functions_0x6f.html
@@ -72,7 +72,7 @@ Here is a list of all documented class members with links to the class documenta
 : <a class="el" href="a00362.html#09dde78a4100800c11bb883d6204b586">tbb::tick_count</a>, <a class="el" href="a00363.html#fa509691e1d689830931e36edd274f76">tbb::tick_count::interval_t</a><li>operator-=()
 : <a class="el" href="a00363.html#35ff7eaf7c2031b4a991402ac9ecb940">tbb::tick_count::interval_t</a><li>operator->()
 : <a class="el" href="a00282.html#a807920cdffe3ec5c5e282b4d1ff92a2">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator >::accessor</a>, <a class="el" href="a00284.html#3d03a48ecb8cd9549bd8be64b09c9b0d">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator >::const_accessor</a><li>operator=()
-: <a class="el" href="a00288.html#85cc876b1dec457b831b4745be274be1">tbb::concurrent_vector< T, A ></a>, <a class="el" href="a00285.html#9296c2eaeeae24cb7019659c2fdf0f62">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a>, <a class="el" href="a00281.html#088d1aaccc816884a49e38f7065622c8">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>operator[]()
+: <a class="el" href="a00288.html#85cc876b1dec457b831b4745be274be1">tbb::concurrent_vector< T, A ></a>, <a class="el" href="a00285.html#9296c2eaeeae24cb7019659c2fdf0f62">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a>, <a class="el" href="a00281.html#f765500afa7f55480f3991cfbe826b28">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>operator[]()
 : <a class="el" href="a00288.html#c6fade5c732cc95274d1d8277ea619d1">tbb::concurrent_vector< T, A ></a><li>output_type
 : <a class="el" href="a00304.html#6e67fc480147c0b88a483b85db6457b0">tbb::flow::interface6::limiter_node< T ></a>, <a class="el" href="a00324.html#2cb099b590246b6bc93cc15e78c6ee5c">tbb::flow::interface6::priority_queue_node< T, Compare, A ></a>, <a class="el" href="a00340.html#ca026eaef70e35791c407323199031a7">tbb::flow::interface6::sequencer_node< T, A ></a>, <a class="el" href="a00325.html#25b5a53ab1f9a342644fa3759bc0b1ad">tbb::flow::interface6::queue_node< T, A &gt [...]
 <hr>
diff --git a/doc/html/functions_func_0x63.html b/doc/html/functions_func_0x63.html
index f0f86b1..a45f8fd 100644
--- a/doc/html/functions_func_0x63.html
+++ b/doc/html/functions_func_0x63.html
@@ -70,7 +70,7 @@
 : <a class="el" href="a00273.html#3336ba9480fd6c43e158f9beb024c050">tbb::blocked_range3d< PageValue, RowValue, ColValue ></a>, <a class="el" href="a00272.html#392a46759af2c884957115771affa7f4">tbb::blocked_range2d< RowValue, ColValue ></a><li>compact()
 : <a class="el" href="a00288.html#1693d1da41b1a8235871be9c6633be35">tbb::concurrent_vector< T, A ></a><li>concurrent_bounded_queue()
 : <a class="el" href="a00280.html#a5e04dcd7db9fd9b583b4e7df832246a">tbb::concurrent_bounded_queue< T, A ></a><li>concurrent_hash_map()
-: <a class="el" href="a00281.html#83c40f2053f208861b90390e12a36436">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>concurrent_priority_queue()
+: <a class="el" href="a00281.html#3bb2e3526ed741f0689c00d6cd022431">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>concurrent_priority_queue()
 : <a class="el" href="a00285.html#c8b20e7430c5302936030bef59a562be">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a><li>concurrent_queue()
 : <a class="el" href="a00287.html#9102b897776bd2d9e908e6604ff16b5f">tbb::deprecated::concurrent_queue< T, A ></a>, <a class="el" href="a00286.html#8a6b98ea11a867db8ac868f0113ca429">tbb::strict_ppl::concurrent_queue< T, A ></a><li>concurrent_vector()
 : <a class="el" href="a00288.html#4450de83c5862ea4bcd9443fd7e67419">tbb::concurrent_vector< T, A ></a><li>const_accessor()
diff --git a/doc/html/functions_func_0x69.html b/doc/html/functions_func_0x69.html
index e092979..3fdff27 100644
--- a/doc/html/functions_func_0x69.html
+++ b/doc/html/functions_func_0x69.html
@@ -65,7 +65,8 @@
 : <a class="el" href="a00300.html#a993b789d1e488e0c3929135beae560e">tbb::flow::interface6::graph</a><li>init()
 : <a class="el" href="a00352.html#49a55352084fd44b8863d182e839e6dc">tbb::task_group_context</a><li>initialize()
 : <a class="el" href="a00355.html#d5ed214a8bb53b0466ed91ff4734b9a3">tbb::task_scheduler_init</a>, <a class="el" href="a00350.html#ff36aa5ec1305ca9931396fa608981da">tbb::interface6::task_arena</a><li>insert()
-: <a class="el" href="a00281.html#1dd37fad87e561151ba1e242ca94bcc1">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>internal_copy()
+: <a class="el" href="a00281.html#1dd37fad87e561151ba1e242ca94bcc1">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>internal_construct()
+: <a class="el" href="a00343.html#4b3fa21632815f8fab2fd6c67ec0d48c">tbb::spin_mutex</a><li>internal_copy()
 : <a class="el" href="a00281.html#72c9c9e9655fcf096f5f0ed9c8ba6669">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>internal_equal_range()
 : <a class="el" href="a00281.html#8f5373b8e1864619d1ffcf3bf3f1f13d">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>internal_fast_find()
 : <a class="el" href="a00281.html#2f76ed101a0ccc8875b846c2f747897e">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>internal_forward_task()
diff --git a/doc/html/functions_func_0x6f.html b/doc/html/functions_func_0x6f.html
index e77b355..9bd06cc 100644
--- a/doc/html/functions_func_0x6f.html
+++ b/doc/html/functions_func_0x6f.html
@@ -68,7 +68,7 @@
 : <a class="el" href="a00363.html#cd9814947902e26463a69a111530f81b">tbb::tick_count::interval_t</a><li>operator-=()
 : <a class="el" href="a00363.html#35ff7eaf7c2031b4a991402ac9ecb940">tbb::tick_count::interval_t</a><li>operator->()
 : <a class="el" href="a00282.html#a807920cdffe3ec5c5e282b4d1ff92a2">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator >::accessor</a>, <a class="el" href="a00284.html#3d03a48ecb8cd9549bd8be64b09c9b0d">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator >::const_accessor</a><li>operator=()
-: <a class="el" href="a00288.html#85cc876b1dec457b831b4745be274be1">tbb::concurrent_vector< T, A ></a>, <a class="el" href="a00285.html#9296c2eaeeae24cb7019659c2fdf0f62">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a>, <a class="el" href="a00281.html#088d1aaccc816884a49e38f7065622c8">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>operator[]()
+: <a class="el" href="a00288.html#85cc876b1dec457b831b4745be274be1">tbb::concurrent_vector< T, A ></a>, <a class="el" href="a00285.html#9296c2eaeeae24cb7019659c2fdf0f62">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a>, <a class="el" href="a00281.html#f765500afa7f55480f3991cfbe826b28">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>operator[]()
 : <a class="el" href="a00288.html#c6fade5c732cc95274d1d8277ea619d1">tbb::concurrent_vector< T, A ></a></ul>
 <hr>
 <p></p>
diff --git a/doc/html/globals.html b/doc/html/globals.html
index 8c6e928..5f7ed0c 100644
--- a/doc/html/globals.html
+++ b/doc/html/globals.html
@@ -31,7 +31,8 @@ Here is a list of all documented file members with links to the documentation:
 <li>scalable_aligned_free()
 : <a class="el" href="a00443.html#g65a20e812012f15ec7442d5b45d0cba5">scalable_allocator.h</a><li>scalable_aligned_malloc()
 : <a class="el" href="a00443.html#gc1c7aaa1fe85c17ba5a3a96f7e8d89e7">scalable_allocator.h</a><li>scalable_aligned_realloc()
-: <a class="el" href="a00443.html#gbaea91376be80dfabd7c93eaffd9abaa">scalable_allocator.h</a><li>scalable_allocation_mode()
+: <a class="el" href="a00443.html#gbaea91376be80dfabd7c93eaffd9abaa">scalable_allocator.h</a><li>scalable_allocation_command()
+: <a class="el" href="a00443.html#gcdbd064aec22571ec84e906166a831a2">scalable_allocator.h</a><li>scalable_allocation_mode()
 : <a class="el" href="a00443.html#gb9ee52ffc5400f15c3d8af8c7613c05a">scalable_allocator.h</a><li>scalable_calloc()
 : <a class="el" href="a00443.html#g3f5a2fde0bcaa3eda35be32c8658f444">scalable_allocator.h</a><li>scalable_free()
 : <a class="el" href="a00443.html#gca3579c21244dba9f0c351e5984d4565">scalable_allocator.h</a><li>scalable_malloc()
diff --git a/doc/html/globals_func.html b/doc/html/globals_func.html
index 65325a7..2984574 100644
--- a/doc/html/globals_func.html
+++ b/doc/html/globals_func.html
@@ -31,7 +31,8 @@
 <li>scalable_aligned_free()
 : <a class="el" href="a00443.html#g65a20e812012f15ec7442d5b45d0cba5">scalable_allocator.h</a><li>scalable_aligned_malloc()
 : <a class="el" href="a00443.html#gc1c7aaa1fe85c17ba5a3a96f7e8d89e7">scalable_allocator.h</a><li>scalable_aligned_realloc()
-: <a class="el" href="a00443.html#gbaea91376be80dfabd7c93eaffd9abaa">scalable_allocator.h</a><li>scalable_allocation_mode()
+: <a class="el" href="a00443.html#gbaea91376be80dfabd7c93eaffd9abaa">scalable_allocator.h</a><li>scalable_allocation_command()
+: <a class="el" href="a00443.html#gcdbd064aec22571ec84e906166a831a2">scalable_allocator.h</a><li>scalable_allocation_mode()
 : <a class="el" href="a00443.html#gb9ee52ffc5400f15c3d8af8c7613c05a">scalable_allocator.h</a><li>scalable_calloc()
 : <a class="el" href="a00443.html#g3f5a2fde0bcaa3eda35be32c8658f444">scalable_allocator.h</a><li>scalable_free()
 : <a class="el" href="a00443.html#gca3579c21244dba9f0c351e5984d4565">scalable_allocator.h</a><li>scalable_malloc()
diff --git a/doc/html/namespacemembers.html b/doc/html/namespacemembers.html
index c7a3f12..17ced4f 100644
--- a/doc/html/namespacemembers.html
+++ b/doc/html/namespacemembers.html
@@ -23,6 +23,7 @@
   <ul>
     <li id="current"><a href="namespacemembers.html"><span>All</span></a></li>
     <li><a href="namespacemembers_func.html"><span>Functions</span></a></li>
+    <li><a href="namespacemembers_type.html"><span>Typedefs</span></a></li>
     <li><a href="namespacemembers_enum.html"><span>Enumerations</span></a></li>
     <li><a href="namespacemembers_eval.html"><span>Enumerator</span></a></li>
   </ul>
@@ -46,7 +47,8 @@ Here is a list of all documented namespace members with links to the namespaces
 : <a class="el" href="a00441.html#g62fde400a37bbca1a2fddc8e3d22f556">tbb</a><li>parallel_sort()
 : <a class="el" href="a00441.html#gc7576f82fdedc8a701a6c17ad9415926">tbb</a><li>relaxed
 : <a class="el" href="a00428.html#a8686246bb5d3664bd07563749970fefaba4b1574646947f1b519188a93c1900">tbb</a><li>release
-: <a class="el" href="a00428.html#a8686246bb5d3664bd07563749970fefaa1fa107db0245c41fb109d976ae8d70">tbb</a><li>TBB_runtime_interface_version()
+: <a class="el" href="a00428.html#a8686246bb5d3664bd07563749970fefaa1fa107db0245c41fb109d976ae8d70">tbb</a><li>speculative_spin_mutex
+: <a class="el" href="a00444.html#g645a9ea2a448def7e529d4ba0c6f39b8">tbb</a><li>TBB_runtime_interface_version()
 : <a class="el" href="a00428.html#a6858b22e90041c9c4669674ff39b056">tbb</a></ul>
 <hr>
 <p></p>
diff --git a/doc/html/namespacemembers_enum.html b/doc/html/namespacemembers_enum.html
index 137232c..9c03df7 100644
--- a/doc/html/namespacemembers_enum.html
+++ b/doc/html/namespacemembers_enum.html
@@ -23,6 +23,7 @@
   <ul>
     <li><a href="namespacemembers.html"><span>All</span></a></li>
     <li><a href="namespacemembers_func.html"><span>Functions</span></a></li>
+    <li><a href="namespacemembers_type.html"><span>Typedefs</span></a></li>
     <li id="current"><a href="namespacemembers_enum.html"><span>Enumerations</span></a></li>
     <li><a href="namespacemembers_eval.html"><span>Enumerator</span></a></li>
   </ul>
diff --git a/doc/html/namespacemembers_eval.html b/doc/html/namespacemembers_eval.html
index 48ba999..e2a680f 100644
--- a/doc/html/namespacemembers_eval.html
+++ b/doc/html/namespacemembers_eval.html
@@ -23,6 +23,7 @@
   <ul>
     <li><a href="namespacemembers.html"><span>All</span></a></li>
     <li><a href="namespacemembers_func.html"><span>Functions</span></a></li>
+    <li><a href="namespacemembers_type.html"><span>Typedefs</span></a></li>
     <li><a href="namespacemembers_enum.html"><span>Enumerations</span></a></li>
     <li id="current"><a href="namespacemembers_eval.html"><span>Enumerator</span></a></li>
   </ul>
diff --git a/doc/html/namespacemembers_func.html b/doc/html/namespacemembers_func.html
index 2dc8dad..4ecc0b8 100644
--- a/doc/html/namespacemembers_func.html
+++ b/doc/html/namespacemembers_func.html
@@ -23,6 +23,7 @@
   <ul>
     <li><a href="namespacemembers.html"><span>All</span></a></li>
     <li id="current"><a href="namespacemembers_func.html"><span>Functions</span></a></li>
+    <li><a href="namespacemembers_type.html"><span>Typedefs</span></a></li>
     <li><a href="namespacemembers_enum.html"><span>Enumerations</span></a></li>
     <li><a href="namespacemembers_eval.html"><span>Enumerator</span></a></li>
   </ul>
diff --git a/doc/html/namespacemembers_enum.html b/doc/html/namespacemembers_type.html
similarity index 84%
copy from doc/html/namespacemembers_enum.html
copy to doc/html/namespacemembers_type.html
index 137232c..4a4248b 100644
--- a/doc/html/namespacemembers_enum.html
+++ b/doc/html/namespacemembers_type.html
@@ -23,16 +23,16 @@
   <ul>
     <li><a href="namespacemembers.html"><span>All</span></a></li>
     <li><a href="namespacemembers_func.html"><span>Functions</span></a></li>
-    <li id="current"><a href="namespacemembers_enum.html"><span>Enumerations</span></a></li>
+    <li id="current"><a href="namespacemembers_type.html"><span>Typedefs</span></a></li>
+    <li><a href="namespacemembers_enum.html"><span>Enumerations</span></a></li>
     <li><a href="namespacemembers_eval.html"><span>Enumerator</span></a></li>
   </ul>
 </div>
  
 <p>
 <ul>
-<li>ets_key_usage_type
-: <a class="el" href="a00428.html#a8622ae61b7e7737dac26542e181178e">tbb</a><li>memory_semantics
-: <a class="el" href="a00428.html#a8686246bb5d3664bd07563749970fef">tbb</a></ul>
+<li>speculative_spin_mutex
+: <a class="el" href="a00444.html#g645a9ea2a448def7e529d4ba0c6f39b8">tbb</a></ul>
 <hr>
 <p></p>
 Copyright © 2005-2013 Intel Corporation.  All Rights Reserved.
diff --git a/examples/GettingStarted/sub_string_finder/Makefile b/examples/GettingStarted/sub_string_finder/Makefile
index 167ed03..bda6e51 100644
--- a/examples/GettingStarted/sub_string_finder/Makefile
+++ b/examples/GettingStarted/sub_string_finder/Makefile
@@ -35,19 +35,14 @@ ifneq (,$(shell which icc 2>/dev/null))
 CXX=icc
 endif # icc
 
-ifeq ($(offload), mic)
-override CXXFLAGS += -opt-report-phase:offload -D__TBB_MIC_OFFLOAD=1
-endif
-
-ifeq (,$(filter icc icpc,$(CXX)))
 TBBLIB = -ltbb
 TBBLIB_DEBUG = -ltbb_debug
-else
-TBBLIB = -tbb
-TBBLIB_DEBUG = -ltbb_debug
+
 ifeq ($(offload), mic)
-TBBLIB_DEBUG += -offload-option,mic,ld,"-ltbb_debug -L${TBBROOT}/lib/mic/"
-endif
+override CXXFLAGS += -opt-report-phase:offload -D__TBB_MIC_OFFLOAD=1
+# Replace -ltbb with -tbb in the offload mode
+TBBLIB = -tbb
+TBBLIB_DEBUG += -offload-option,mic,ld,"-ltbb_debug"
 endif
 
 ifeq ($(shell uname), Linux)
diff --git a/examples/GettingStarted/sub_string_finder/msvs/sub_string_finder.vcproj b/examples/GettingStarted/sub_string_finder/msvs/sub_string_finder.vcproj
index 97cc62e..b228041 100644
--- a/examples/GettingStarted/sub_string_finder/msvs/sub_string_finder.vcproj
+++ b/examples/GettingStarted/sub_string_finder/msvs/sub_string_finder.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -218,7 +218,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -297,7 +297,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/GettingStarted/sub_string_finder/msvs/sub_string_finder_extended.vcproj b/examples/GettingStarted/sub_string_finder/msvs/sub_string_finder_extended.vcproj
index 05175b2..33374e3 100644
--- a/examples/GettingStarted/sub_string_finder/msvs/sub_string_finder_extended.vcproj
+++ b/examples/GettingStarted/sub_string_finder/msvs/sub_string_finder_extended.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -218,7 +218,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -297,7 +297,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/GettingStarted/sub_string_finder/msvs/sub_string_finder_pretty.vcproj b/examples/GettingStarted/sub_string_finder/msvs/sub_string_finder_pretty.vcproj
index 527a11b..6d28b05 100644
--- a/examples/GettingStarted/sub_string_finder/msvs/sub_string_finder_pretty.vcproj
+++ b/examples/GettingStarted/sub_string_finder/msvs/sub_string_finder_pretty.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -218,7 +218,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -297,7 +297,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/Makefile b/examples/Makefile
index 571783d..bfc325e 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -127,6 +127,7 @@ else
     export CPATH := $(CPATH):$(tbb_root)/include
 endif
 
+export CXXFLAGS
 COMMON_TARGETS := all clean release debug test perf_build perf_run
 # list of directories of examples
 EXAMPLES_DIRS := $(foreach T,$(EXAMPLES),$(dir $(T)))
@@ -142,8 +143,8 @@ $(COMMON_TARGETS):: % : $(addsuffix %,$(EXAMPLES_DIRS))
 
 # proxy rule for calling appropriate example
 $(EXAMPLES_TARGETS)::
-	@echo --------------------------------------------------
-	-$(MAKE) target="$(target)" SHELL=$(SHELL) -C $(@D)  -f $(EXAMPLE_MAKEFILE) $(notdir $@) CXX="$(CPLUS)" CXXFLAGS="$(CXXFLAGS)"
+	@echo ------------------------ $@ ------------------------
+	-$(MAKE) -C $(@D)  -f $(EXAMPLE_MAKEFILE) $(notdir $@) CXX="$(CPLUS)"
 
 printenv:
 ifeq ($(tbb_os),windows)
diff --git a/examples/common/gui/Makefile.win b/examples/common/gui/Makefile.win
index 3ebbb45..c5b8007 100644
--- a/examples/common/gui/Makefile.win
+++ b/examples/common/gui/Makefile.win
@@ -27,7 +27,7 @@
 # Per-build Makefile rules (for recursive $(MAKE) calls from Makefile)
 
 # Base compile/link options
-MYCXXFLAGS = /nologo /EHsc /Zc:forScope /D WIN32 /D _MBCS /D _CRT_SECURE_NO_DEPRECATE $(CXXFLAGS)
+MYCXXFLAGS = /nologo /EHsc /Zc:forScope /D WIN32 /D _MBCS /D _CRT_SECURE_NO_DEPRECATE /MP $(CXXFLAGS)
 MYLFLAGS = /link /incremental:no /fixed:no $(LFLAGS)
 CXXFLAGS_NDEBUG = /MD /O2 /Ot /Gy /D NDEBUG
 CXXFLAGS_DEBUG = /MDd /Od /Zi /D _DEBUG
diff --git a/examples/concurrent_hash_map/count_strings/msvs/count_strings.vcproj b/examples/concurrent_hash_map/count_strings/msvs/count_strings.vcproj
index 6a74e2e..797141c 100644
--- a/examples/concurrent_hash_map/count_strings/msvs/count_strings.vcproj
+++ b/examples/concurrent_hash_map/count_strings/msvs/count_strings.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -218,7 +218,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -297,7 +297,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/concurrent_priority_queue/shortpath/msvs/shortpath.vcproj b/examples/concurrent_priority_queue/shortpath/msvs/shortpath.vcproj
index e5463df..a1f740d 100644
--- a/examples/concurrent_priority_queue/shortpath/msvs/shortpath.vcproj
+++ b/examples/concurrent_priority_queue/shortpath/msvs/shortpath.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -218,7 +218,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -297,7 +297,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/graph/binpack/msvs/binpack.vcproj b/examples/graph/binpack/msvs/binpack.vcproj
index b93a9ae..2e3bb3d 100644
--- a/examples/graph/binpack/msvs/binpack.vcproj
+++ b/examples/graph/binpack/msvs/binpack.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib tbbmalloc_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib tbbmalloc_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -218,7 +218,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib tbbmalloc.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -297,7 +297,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib tbbmalloc.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/graph/dining_philosophers/msvs/dining_philosophers.vcproj b/examples/graph/dining_philosophers/msvs/dining_philosophers.vcproj
index d49c0b1..1474b9c 100644
--- a/examples/graph/dining_philosophers/msvs/dining_philosophers.vcproj
+++ b/examples/graph/dining_philosophers/msvs/dining_philosophers.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -218,7 +218,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -297,7 +297,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/graph/logic_sim/msvs/test_all.vcproj b/examples/graph/logic_sim/msvs/test_all.vcproj
index 9c6565b..2d2500a 100644
--- a/examples/graph/logic_sim/msvs/test_all.vcproj
+++ b/examples/graph/logic_sim/msvs/test_all.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib tbbmalloc_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib tbbmalloc_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -218,7 +218,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib tbbmalloc.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -297,7 +297,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib tbbmalloc.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/parallel_do/parallel_preorder/Makefile b/examples/parallel_do/parallel_preorder/Makefile
index 003be66..0acf3c7 100644
--- a/examples/parallel_do/parallel_preorder/Makefile
+++ b/examples/parallel_do/parallel_preorder/Makefile
@@ -29,6 +29,7 @@ run_cmd=
 PROG=parallel_preorder
 ARGS=
 PERF_RUN_ARGS=auto silent 500000  100
+LIGHT_ARGS=1:auto:+4 n-of-traversals=50
 
 # The C++ compiler
 ifneq (,$(shell which icc 2>/dev/null))
@@ -58,6 +59,9 @@ clean:
 test:
 	$(run_cmd) ./$(PROG) $(ARGS)
 
+light_test:
+	$(run_cmd) ./$(PROG) $(LIGHT_ARGS)
+
 perf_build:	release
 
 perf_run:
diff --git a/examples/parallel_do/parallel_preorder/Makefile.windows b/examples/parallel_do/parallel_preorder/Makefile.windows
index 7fd390a..2a508db 100644
--- a/examples/parallel_do/parallel_preorder/Makefile.windows
+++ b/examples/parallel_do/parallel_preorder/Makefile.windows
@@ -30,6 +30,7 @@
 PROG=Parallel_Preorder
 ARGS=
 PERF_RUN_ARGS=auto silent 500000  100
+LIGHT_ARGS=1:auto:+4 n-of-traversals=50
 
 # Trying to find if icl.exe is set
 CXX1 = $(TBB_CXX)-
@@ -49,6 +50,8 @@ clean:
 	@cmd.exe /C del $(PROG).exe *.obj *.?db *.manifest
 test:
 	$(PROG) $(ARGS)
+light_test:
+	$(PROG) $(LIGHT_ARGS)
 compiler_check:
 	@echo compiler_test>compiler_test && @$(CXX) /E compiler_test >nul 2>&1  || echo "$(CXX) command not found. Check if CXX=$(CXX) is set properly"
 	@cmd.exe /C del compiler_test
diff --git a/examples/parallel_do/parallel_preorder/msvs/parallel_preorder.vcproj b/examples/parallel_do/parallel_preorder/msvs/parallel_preorder.vcproj
index 2a0d923..9f73787 100644
--- a/examples/parallel_do/parallel_preorder/msvs/parallel_preorder.vcproj
+++ b/examples/parallel_do/parallel_preorder/msvs/parallel_preorder.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -218,7 +218,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -297,7 +297,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/parallel_for/game_of_life/msvs/Game_of_life.vcproj b/examples/parallel_for/game_of_life/msvs/Game_of_life.vcproj
index 554055a..031cd02 100644
--- a/examples/parallel_for/game_of_life/msvs/Game_of_life.vcproj
+++ b/examples/parallel_for/game_of_life/msvs/Game_of_life.vcproj
@@ -67,7 +67,7 @@
 				IgnoreImportLibrary="false"
 				AdditionalDependencies="tbb_debug.lib user32.lib $(NOINHERIT)"
 				LinkIncremental="2"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8;"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9;"
 				GenerateDebugInformation="true"
 				AssemblyDebug="1"
 				SubSystem="2"
@@ -152,7 +152,7 @@
 				IgnoreImportLibrary="false"
 				AdditionalDependencies="tbb_debug.lib user32.lib $(NOINHERIT)"
 				LinkIncremental="2"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				AssemblyDebug="1"
 				SubSystem="2"
@@ -235,7 +235,7 @@
 				IgnoreImportLibrary="false"
 				AdditionalDependencies="tbb.lib user32.lib $(NOINHERIT)"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8;"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9;"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -320,7 +320,7 @@
 				IgnoreImportLibrary="false"
 				AdditionalDependencies="tbb.lib user32.lib $(NOINHERIT)"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8;"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9;"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -402,7 +402,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8;"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9;"
 				GenerateDebugInformation="true"
 				ProgramDatabaseFile="$(TargetDir)$(TargetName).pdb"
 				SubSystem="1"
@@ -483,7 +483,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8;"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9;"
 				GenerateDebugInformation="true"
 				ProgramDatabaseFile="$(TargetDir)$(TargetName).pdb"
 				SubSystem="1"
@@ -562,7 +562,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -643,7 +643,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/parallel_for/polygon_overlay/msvs/pover.vcproj b/examples/parallel_for/polygon_overlay/msvs/pover.vcproj
index ac773e3..e474903 100644
--- a/examples/parallel_for/polygon_overlay/msvs/pover.vcproj
+++ b/examples/parallel_for/polygon_overlay/msvs/pover.vcproj
@@ -67,7 +67,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib tbbmalloc_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				TargetMachine="1"
@@ -149,7 +149,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib tbbmalloc_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				TargetMachine="17"
@@ -232,7 +232,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib tbbmalloc.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -317,7 +317,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib tbbmalloc.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -403,7 +403,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib tbbmalloc.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8;$(DXSDK_DIR)\lib\x86"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9;$(DXSDK_DIR)\lib\x86"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -489,7 +489,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib tbbmalloc.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8;$(DXSDK_DIR)\lib\x64"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9;$(DXSDK_DIR)\lib\x64"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -574,7 +574,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib tbbmalloc_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8;"$(DXSDK_DIR)\lib\x86""
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9;"$(DXSDK_DIR)\lib\x86""
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				TargetMachine="1"
@@ -657,7 +657,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib tbbmalloc_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8;"$(DXSDK_DIR)\lib\x64""
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9;"$(DXSDK_DIR)\lib\x64""
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				TargetMachine="17"
diff --git a/examples/parallel_for/seismic/Makefile b/examples/parallel_for/seismic/Makefile
index f2f7dc9..1fb701c 100644
--- a/examples/parallel_for/seismic/Makefile
+++ b/examples/parallel_for/seismic/Makefile
@@ -26,7 +26,7 @@
 
 # GNU Makefile that builds and runs example.
 NAME=seismic
-ARGS=0:4 300
+ARGS=0:auto 300
 PERF_RUN_ARGS=auto 10000 silent
 LIGHT_ARGS=1:2 100
 
@@ -35,27 +35,21 @@ ifneq (,$(shell which icc 2>/dev/null))
 CXX=icc
 endif # icc
 
+TBBLIB = -ltbb
+TBBLIB_DEBUG = -ltbb_debug
+
 ifeq ($(offload), mic)
-override CXXFLAGS += -D__TBB_MIC_OFFLOAD=1
+override CXXFLAGS += -D__TBB_MIC_OFFLOAD=1 -offload-attribute-target=mic
+# Replace -ltbb with -tbb in the offload mode
+TBBLIB = -tbb
+TBBLIB_DEBUG += -offload-option,mic,ld,"-ltbb_debug"
 # Currently only console mode is supported in offload version
 override UI=con
-SOURCES = $(UI)video.o universe.cpp seismic_video.cpp main.cpp
-OFFLOAD_SOURCES = ../../common/gui/$(UI)video.cpp
 else
-SOURCES = ../../common/gui/$(UI)video.cpp universe.cpp seismic_video.cpp main.cpp
 PERFFLAGS=-msse2
 endif
 
-ifeq (,$(filter icc icpc,$(CXX)))
-TBBLIB = -ltbb
-TBBLIB_DEBUG = -ltbb_debug
-else
-TBBLIB = -tbb
-TBBLIB_DEBUG = -ltbb_debug
-ifeq ($(offload), mic)
-TBBLIB_DEBUG += -offload-option,mic,ld,"-ltbb_debug -L${TBBROOT}/lib/mic/"
-endif
-endif
+SOURCES = ../../common/gui/$(UI)video.cpp universe.cpp seismic_video.cpp main.cpp
 
 include ../../common/gui/Makefile.gmake
 override CXXFLAGS += $(UI_CXXFLAGS)
@@ -75,18 +69,12 @@ ifeq ($(UI),mac)
 endif # OS X*
 
 release: resources
-ifeq ($(offload), mic)
-	$(CXX) -O2 -DNDEBUG $(CXXFLAGS) -c -offload-attribute-target=mic $(OFFLOAD_SOURCES)
-endif
 ifeq ($(UI),mac)
 	$(CXX_UI) -O2 -DNDEBUG $(CXXFLAGS) -c $(MACUISOURCES)
 endif # OS X*
 	$(CXX) -O2 -DNDEBUG $(CXXFLAGS) -o $(EXE) $(SOURCES) $(MACUIOBJS) $(TBBLIB) $(LIBS)
 
 debug: resources
-ifeq ($(offload), mic)
-	$(CXX) -g -O0 -DTBB_USE_DEBUG $(CXXFLAGS) -c -offload-attribute-target=mic $(OFFLOAD_SOURCES)
-endif
 ifeq ($(UI),mac)
 	$(CXX_UI) -g -O0 -DTBB_USE_DEBUG $(CXXFLAGS) -c $(MACUISOURCES)
 endif # OS X*
diff --git a/examples/parallel_for/seismic/main.cpp b/examples/parallel_for/seismic/main.cpp
index 0f2bbfd..d2b1458 100644
--- a/examples/parallel_for/seismic/main.cpp
+++ b/examples/parallel_for/seismic/main.cpp
@@ -32,17 +32,11 @@
 #include "tbb/tick_count.h"
 #include "../../common/utility/utility.h"
 
-#if __TBB_MIC_OFFLOAD
-#pragma offload_attribute (push,target(mic))
-#endif // __TBB_MIC_OFFLOAD
 #include "seismic_video.h"
 #include "universe.h"
 #include "tbb/task_scheduler_init.h"
 
 Universe u;
-#if __TBB_MIC_OFFLOAD
-#pragma offload_attribute (pop)
-#endif // __TBB_MIC_OFFLOAD
 
 struct RunOptions {
     //! It is used for console mode for test with different number of threads and also has
@@ -60,9 +54,9 @@ struct RunOptions {
 
 int do_get_default_num_threads() {
     int threads;
-    #if __TBB_MIC_OFFLOAD
+#if __TBB_MIC_OFFLOAD
     #pragma offload target(mic) out(threads)
-    #endif // __TBB_MIC_OFFLOAD
+#endif // __TBB_MIC_OFFLOAD
     threads = tbb::task_scheduler_init::default_num_threads();
     return threads;
 }
@@ -113,7 +107,7 @@ int main(int argc, char *argv[])
                 tbb::tick_count xwayParallelismStartTime = tbb::tick_count::now();
                 u.InitializeUniverse(video);
                 int numberOfFrames = options.numberOfFrames;
-                #if __TBB_MIC_OFFLOAD
+#if __TBB_MIC_OFFLOAD
                 drawing_memory dmem = video.get_drawing_memory();
                 char *pMem = dmem.get_address();
                 size_t memSize = dmem.get_size();
@@ -124,21 +118,21 @@ int main(int argc, char *argv[])
                     // since the address spaces on host and on target are different
                     dmem.set_address(pMem);
                     u.SetDrawingMemory(dmem);
-                #endif // __TBB_MIC_OFFLOAD
-                if (p==0){
-                    //run a serial version
-                    for( int i=0; i<numberOfFrames; ++i ){
-                        u.SerialUpdateUniverse();
-                    }
-                }else{
-                    tbb::task_scheduler_init init(p);
-                    for( int i=0; i<numberOfFrames; ++i ){
-                        u.ParallelUpdateUniverse();
+#endif // __TBB_MIC_OFFLOAD
+                    if (p==0){
+                        //run a serial version
+                        for( int i=0; i<numberOfFrames; ++i ){
+                            u.SerialUpdateUniverse();
+                        }
+                    }else{
+                        tbb::task_scheduler_init init(p);
+                        for( int i=0; i<numberOfFrames; ++i ){
+                            u.ParallelUpdateUniverse();
+                        }
                     }
+#if __TBB_MIC_OFFLOAD
                 }
-                #if __TBB_MIC_OFFLOAD
-                }
-                #endif // __TBB_MIC_OFFLOAD
+#endif // __TBB_MIC_OFFLOAD
 
                 if (!options.silent){
                     double fps =  options.numberOfFrames/((tbb::tick_count::now()-xwayParallelismStartTime).seconds());
diff --git a/examples/parallel_for/seismic/msvs/SeismicSimulation.vcproj b/examples/parallel_for/seismic/msvs/SeismicSimulation.vcproj
index 61e7535..e0683b4 100644
--- a/examples/parallel_for/seismic/msvs/SeismicSimulation.vcproj
+++ b/examples/parallel_for/seismic/msvs/SeismicSimulation.vcproj
@@ -67,7 +67,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				TargetMachine="1"
@@ -149,7 +149,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				TargetMachine="17"
@@ -232,7 +232,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -317,7 +317,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -403,7 +403,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8;$(DXSDK_DIR)\lib\x86"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9;$(DXSDK_DIR)\lib\x86"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -489,7 +489,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8;$(DXSDK_DIR)\lib\x64"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9;$(DXSDK_DIR)\lib\x64"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -574,7 +574,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8;$(DXSDK_DIR)\lib\x86"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9;$(DXSDK_DIR)\lib\x86"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				TargetMachine="1"
@@ -657,7 +657,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8;$(DXSDK_DIR)\lib\x64"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9;$(DXSDK_DIR)\lib\x64"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				TargetMachine="17"
diff --git a/examples/parallel_for/seismic/universe.cpp b/examples/parallel_for/seismic/universe.cpp
index 8665269..0344deb 100644
--- a/examples/parallel_for/seismic/universe.cpp
+++ b/examples/parallel_for/seismic/universe.cpp
@@ -26,10 +26,6 @@
     the GNU General Public License.
 */
 
-#if __TBB_MIC_OFFLOAD
-#pragma offload_attribute (target(mic))
-#endif // __TBB_MIC_OFFLOAD
-
 #include "../../common/gui/video.h"
 #include <cmath>
 #include "tbb/blocked_range.h"
diff --git a/examples/parallel_for/tachyon/Makefile.windows b/examples/parallel_for/tachyon/Makefile.windows
index 99158ef..2443586 100644
--- a/examples/parallel_for/tachyon/Makefile.windows
+++ b/examples/parallel_for/tachyon/Makefile.windows
@@ -86,7 +86,9 @@ LIBS_TBB_DEBUG =  tbb_debug.lib $(LIBS)
 
 
 MAKEINC = ../../common/gui/Makefile.win
-SOURCE = src/main.cpp src/pthread.cpp src/tachyon_video.cpp src/api.cpp src/apigeom.cpp src/apitrigeom.cpp src/bndbox.cpp src/box.cpp src/camera.cpp src/coordsys.cpp src/cylinder.cpp src/extvol.cpp src/global.cpp src/grid.cpp src/imageio.cpp src/imap.cpp src/intersect.cpp src/jpeg.cpp src/light.cpp src/objbound.cpp src/parse.cpp src/plane.cpp src/ppm.cpp src/quadric.cpp src/render.cpp src/ring.cpp src/shade.cpp src/sphere.cpp src/texture.cpp src/tgafile.cpp src/trace_rest.cpp src/triangl [...]
+# scr/main.cpp src/tachyon_video.cpp cannot be included to the SOURCE_COMMON list since it depends on UI and /subsystem which is not specified for common SOURCE build
+SOURCE = src/main.cpp src/tachyon_video.cpp
+SOURCE_COMMON = src/pthread.cpp  src/api.cpp src/apigeom.cpp src/apitrigeom.cpp src/bndbox.cpp src/box.cpp src/camera.cpp src/coordsys.cpp src/cylinder.cpp src/extvol.cpp src/global.cpp src/grid.cpp src/imageio.cpp src/imap.cpp src/intersect.cpp src/jpeg.cpp src/light.cpp src/objbound.cpp src/parse.cpp src/plane.cpp src/ppm.cpp src/quadric.cpp src/render.cpp src/ring.cpp src/shade.cpp src/sphere.cpp src/texture.cpp src/tgafile.cpp src/trace_rest.cpp src/triangle.cpp src/ui.cpp src/util.c [...]
 
 # Targets
 all: build run
@@ -105,33 +107,50 @@ tbb_debug: build_tbb_debug run_tbb
 tbb1d: build_tbb1d run_tbb1d
 tbb1d_debug: build_tbb1d_debug run_tbb1d
 
-build_serial:
-	@$(MAKE) -f $(MAKEINC) SOURCE="src/trace.serial.cpp $(SOURCE)" EXE=tachyon.serial.exe RCNAME=gui UI=$(UI) CXX="$(CXX)" CXXFLAGS=$(CXXFLAGS) XARCH=$(XARCH) build_one
-build_serial_debug:
-	@$(MAKE) -f $(MAKEINC) SOURCE="src/trace.serial.cpp $(SOURCE)" EXE=tachyon.serial.exe RCNAME=gui UI=$(UI) DEBUG=_debug CXX="$(CXX)" CXXFLAGS=$(CXXFLAGS) XARCH=$(XARCH) build_one
+CXXFLAGS_COMMON = /c /nologo /EHsc /Zc:forScope /D WIN32 /D _MBCS /D _CRT_SECURE_NO_DEPRECATE /Foobj/ /MP $(CXXFLAGS)
+CXXFLAGS_NDEBUG = /MD /O2 /Ot /Gy /D NDEBUG
+CXXFLAGS_DEBUG = /MDd /Od /Zi /D _DEBUG
+LIB_LINK_FLAGS = /nologo /machine:$(XARCH)
+
+tachyon_common.lib:
+	@cmd.exe /C if not exist obj mkdir obj
+	$(CXX) $(CXXFLAGS_NDEBUG) $(CXXFLAGS_COMMON) $(SOURCE_COMMON)
+	LIB $(LIB_LINK_FLAGS) obj/*.obj /OUT:$@
+	@cmd.exe /C if exist obj rmdir /S /Q obj
+
+tachyon_common_debug.lib:
+	@cmd.exe /C if not exist obj mkdir obj
+	$(CXX) $(CXXFLAGS_DEBUG) $(CXXFLAGS_COMMON) $(SOURCE_COMMON)
+	LIB $(LIB_LINK_FLAGS) obj/*.obj /OUT:$@
+	@cmd.exe /C if exist obj rmdir /S /Q obj
+
+build_serial: tachyon_common.lib
+	@$(MAKE) -f $(MAKEINC) SOURCE="src/trace.serial.cpp $(SOURCE) tachyon_common.lib" EXE=tachyon.serial.exe RCNAME=gui UI=$(UI) CXX="$(CXX)" CXXFLAGS="/GL $(CXXFLAGS)" XARCH=$(XARCH) build_one
+build_serial_debug: tachyon_common_debug.lib
+	@$(MAKE) -f $(MAKEINC) SOURCE="src/trace.serial.cpp $(SOURCE) tachyon_common_debug.lib" EXE=tachyon.serial.exe RCNAME=gui UI=$(UI) DEBUG=_debug CXX="$(CXX)" CXXFLAGS=$(CXXFLAGS) XARCH=$(XARCH) build_one
 run_serial:
 	-.\tachyon.serial.exe $(ARGS)
 
-build_tbb:
-	@$(MAKE) -f $(MAKEINC) SOURCE="src/trace.tbb.cpp $(SOURCE)" EXE=tachyon.tbb.exe RCNAME=gui UI=$(UI) CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS_TBB_NDEBUG)" LFLAGS="$(LIBS_TBB_NDEBUG)" XARCH=$(XARCH) build_one
-build_tbb_debug:
-	@$(MAKE) -f $(MAKEINC) SOURCE="src/trace.tbb.cpp $(SOURCE)" EXE=tachyon.tbb.exe RCNAME=gui UI=$(UI) DEBUG=_debug CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS_TBB_DEBUG)" LFLAGS="$(LIBS_TBB_DEBUG)" XARCH=$(XARCH) build_one
+build_tbb: tachyon_common.lib
+	@$(MAKE) -f $(MAKEINC) SOURCE="src/trace.tbb.cpp $(SOURCE) tachyon_common.lib" EXE=tachyon.tbb.exe RCNAME=gui UI=$(UI) CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS_TBB_NDEBUG)" LFLAGS="$(LIBS_TBB_NDEBUG)" XARCH=$(XARCH) build_one
+build_tbb_debug: tachyon_common_debug.lib
+	@$(MAKE) -f $(MAKEINC) SOURCE="src/trace.tbb.cpp $(SOURCE) tachyon_common_debug.lib" EXE=tachyon.tbb.exe RCNAME=gui UI=$(UI) DEBUG=_debug CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS_TBB_DEBUG)" LFLAGS="$(LIBS_TBB_DEBUG)" XARCH=$(XARCH) build_one
 run_tbb:
 	-.\tachyon.tbb.exe $(ARGS)
 
-build_tbb1d:
-	@$(MAKE) -f $(MAKEINC) SOURCE="src/trace.tbb1d.cpp $(SOURCE)" EXE=tachyon.tbb1d.exe RCNAME=gui UI=$(UI) CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS_TBB_NDEBUG)" LFLAGS="$(LIBS_TBB_NDEBUG)" XARCH=$(XARCH) build_one
-build_tbb1d_debug:
-	@$(MAKE) -f $(MAKEINC) SOURCE="src/trace.tbb1d.cpp $(SOURCE)" EXE=tachyon.tbb1d.exe RCNAME=gui UI=$(UI) DEBUG=_debug CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS_TBB_DEBUG)" LFLAGS="$(LIBS_TBB_DEBUG)" XARCH=$(XARCH) build_one
+build_tbb1d: tachyon_common.lib
+	@$(MAKE) -f $(MAKEINC) SOURCE="src/trace.tbb1d.cpp $(SOURCE) tachyon_common.lib" EXE=tachyon.tbb1d.exe RCNAME=gui UI=$(UI) CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS_TBB_NDEBUG)" LFLAGS="$(LIBS_TBB_NDEBUG)" XARCH=$(XARCH) build_one
+build_tbb1d_debug: tachyon_common_debug.lib
+	@$(MAKE) -f $(MAKEINC) SOURCE="src/trace.tbb1d.cpp $(SOURCE) tachyon_common_debug.lib" EXE=tachyon.tbb1d.exe RCNAME=gui UI=$(UI) DEBUG=_debug CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS_TBB_DEBUG)" LFLAGS="$(LIBS_TBB_DEBUG)" XARCH=$(XARCH) build_one
 run_tbb1d:
 	-.\tachyon.tbb1d.exe $(ARGS)
 
 
 clean:
-	@cmd.exe /C del tachyon.* *.manifest *.obj msvs\gui.res *.?db
+	@cmd.exe /C del tachyon.* *.manifest *.obj *.lib msvs\gui.res *.?db
 
-perf_build:
-	@$(MAKE) -f $(MAKEINC) SOURCE="src/trace.tbb.cpp $(SOURCE)" EXE=tachyon.tbb.exe RCNAME=gui UI=con CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS_TBB_NDEBUG)" LFLAGS="$(LIBS_TBB_NDEBUG)" XARCH=$(XARCH) build_one
+perf_build: tachyon_common.lib
+	@$(MAKE) -f $(MAKEINC) SOURCE="src/trace.tbb.cpp $(SOURCE) tachyon_common.lib" EXE=tachyon.tbb.exe RCNAME=gui UI=con CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS_TBB_NDEBUG)" LFLAGS="$(LIBS_TBB_NDEBUG)" XARCH=$(XARCH) build_one
 perf_run:
 	-.\tachyon.tbb.exe $(PERF_RUN_ARGS)
 
diff --git a/examples/parallel_for/tachyon/msvs/tachyon.tbb.vcproj b/examples/parallel_for/tachyon/msvs/tachyon.tbb.vcproj
index 2e4a465..0afcccf 100644
--- a/examples/parallel_for/tachyon/msvs/tachyon.tbb.vcproj
+++ b/examples/parallel_for/tachyon/msvs/tachyon.tbb.vcproj
@@ -71,7 +71,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				LinkTimeCodeGeneration="1"
@@ -157,7 +157,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				LinkTimeCodeGeneration="1"
@@ -243,7 +243,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -329,7 +329,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -416,7 +416,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8;$(DXSDK_DIR)\lib\x86"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9;$(DXSDK_DIR)\lib\x86"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -502,7 +502,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8;$(DXSDK_DIR)\lib\x64"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9;$(DXSDK_DIR)\lib\x64"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -590,7 +590,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8;$(DXSDK_DIR)\lib\x86"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9;$(DXSDK_DIR)\lib\x86"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				LinkTimeCodeGeneration="1"
@@ -676,7 +676,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8;$(DXSDK_DIR)\lib\x64"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9;$(DXSDK_DIR)\lib\x64"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				LinkTimeCodeGeneration="1"
diff --git a/examples/parallel_for/tachyon/msvs/tachyon.tbb1d.vcproj b/examples/parallel_for/tachyon/msvs/tachyon.tbb1d.vcproj
index 5044e56..70c9c77 100644
--- a/examples/parallel_for/tachyon/msvs/tachyon.tbb1d.vcproj
+++ b/examples/parallel_for/tachyon/msvs/tachyon.tbb1d.vcproj
@@ -71,7 +71,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				LinkTimeCodeGeneration="1"
@@ -157,7 +157,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				LinkTimeCodeGeneration="1"
@@ -243,7 +243,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -329,7 +329,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -416,7 +416,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8;$(DXSDK_DIR)\lib\x86"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9;$(DXSDK_DIR)\lib\x86"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -502,7 +502,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8;$(DXSDK_DIR)\lib\x64"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9;$(DXSDK_DIR)\lib\x64"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -590,7 +590,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8;$(DXSDK_DIR)\lib\x86"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9;$(DXSDK_DIR)\lib\x86"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				LinkTimeCodeGeneration="1"
@@ -676,7 +676,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8;$(DXSDK_DIR)\lib\x64"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9;$(DXSDK_DIR)\lib\x64"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				LinkTimeCodeGeneration="1"
diff --git a/examples/parallel_reduce/convex_hull/msvs/convex_hull_benchmark.vcproj b/examples/parallel_reduce/convex_hull/msvs/convex_hull_benchmark.vcproj
index f6b9c06..922b81e 100644
--- a/examples/parallel_reduce/convex_hull/msvs/convex_hull_benchmark.vcproj
+++ b/examples/parallel_reduce/convex_hull/msvs/convex_hull_benchmark.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -219,7 +219,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -299,7 +299,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/parallel_reduce/convex_hull/msvs/convex_hull_sample.vcproj b/examples/parallel_reduce/convex_hull/msvs/convex_hull_sample.vcproj
index 10f0eae..635d86d 100644
--- a/examples/parallel_reduce/convex_hull/msvs/convex_hull_sample.vcproj
+++ b/examples/parallel_reduce/convex_hull/msvs/convex_hull_sample.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -219,7 +219,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -299,7 +299,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/parallel_reduce/primes/Makefile b/examples/parallel_reduce/primes/Makefile
index dee9641..184e230 100644
--- a/examples/parallel_reduce/primes/Makefile
+++ b/examples/parallel_reduce/primes/Makefile
@@ -35,19 +35,14 @@ ifneq (,$(shell which icc 2>/dev/null))
 CXX=icc
 endif # icc
 
-ifeq ($(offload), mic)
-override CXXFLAGS += -D__TBB_MIC_OFFLOAD=1
-endif
-
-ifeq (,$(filter icc icpc,$(CXX)))
 TBBLIB = -ltbb
 TBBLIB_DEBUG = -ltbb_debug
-else
-TBBLIB = -tbb
-TBBLIB_DEBUG = -ltbb_debug
+
 ifeq ($(offload), mic)
-TBBLIB_DEBUG += -offload-option,mic,ld,"-ltbb_debug -L${TBBROOT}/lib/mic/"
-endif
+override CXXFLAGS += -D__TBB_MIC_OFFLOAD=1
+# Replace -ltbb with -tbb in the offload mode
+TBBLIB = -tbb
+TBBLIB_DEBUG += -offload-option,mic,ld,"-ltbb_debug"
 endif
 
 ifeq ($(shell uname), Linux)
diff --git a/examples/parallel_reduce/primes/msvs/primes.vcproj b/examples/parallel_reduce/primes/msvs/primes.vcproj
index 74d2ef2..0cabc37 100644
--- a/examples/parallel_reduce/primes/msvs/primes.vcproj
+++ b/examples/parallel_reduce/primes/msvs/primes.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -218,7 +218,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -297,7 +297,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/pipeline/square/msvs/square.vcproj b/examples/pipeline/square/msvs/square.vcproj
index 94087ff..bfaad42 100644
--- a/examples/pipeline/square/msvs/square.vcproj
+++ b/examples/pipeline/square/msvs/square.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -218,7 +218,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -297,7 +297,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/task/tree_sum/msvs/tree_sum.vcproj b/examples/task/tree_sum/msvs/tree_sum.vcproj
index ffe61c6..c80413e 100644
--- a/examples/task/tree_sum/msvs/tree_sum.vcproj
+++ b/examples/task/tree_sum/msvs/tree_sum.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib tbbmalloc_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib tbbmalloc_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -218,7 +218,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib tbbmalloc.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -297,7 +297,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib tbbmalloc.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/task_group/sudoku/Makefile b/examples/task_group/sudoku/Makefile
index eb4a155..2ac56de 100644
--- a/examples/task_group/sudoku/Makefile
+++ b/examples/task_group/sudoku/Makefile
@@ -35,19 +35,16 @@ ifneq (,$(shell which icc 2>/dev/null))
 CXX=icc
 endif # which icc
 
-ifeq ($(offload), mic)
-override CXXFLAGS += -D__TBB_MIC_OFFLOAD=1
-endif
-
-ifeq (,$(filter icc icpc,$(CXX)))
 TBBLIB = -ltbb
 TBBLIB_DEBUG = -ltbb_debug
-else
-TBBLIB = -tbb
+
+ifneq (,$(filter icc icpc,$(CXX)))
 CXX0XFLAGS?=-std=c++0x -D_TBB_CPP0X
-TBBLIB_DEBUG = -ltbb_debug
 ifeq ($(offload), mic)
-TBBLIB_DEBUG += -offload-option,mic,ld,"-ltbb_debug -L${TBBROOT}/lib/mic/"
+override CXXFLAGS += -D__TBB_MIC_OFFLOAD=1
+# Replace -ltbb with -tbb in the offload mode
+TBBLIB = -tbb
+TBBLIB_DEBUG += -offload-option,mic,ld,"-ltbb_debug"
 endif
 endif
 
diff --git a/examples/task_group/sudoku/msvs/sudoku.vcproj b/examples/task_group/sudoku/msvs/sudoku.vcproj
index a84ca15..abf39bd 100644
--- a/examples/task_group/sudoku/msvs/sudoku.vcproj
+++ b/examples/task_group/sudoku/msvs/sudoku.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib tbbmalloc_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib tbbmalloc_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -218,7 +218,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib tbbmalloc.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -297,7 +297,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib tbbmalloc.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/examples/task_priority/fractal/msvs/fractal.vcproj b/examples/task_priority/fractal/msvs/fractal.vcproj
index 2c10b70..909b1ec 100644
--- a/examples/task_priority/fractal/msvs/fractal.vcproj
+++ b/examples/task_priority/fractal/msvs/fractal.vcproj
@@ -67,7 +67,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				TargetMachine="1"
@@ -149,7 +149,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				TargetMachine="17"
@@ -232,7 +232,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -317,7 +317,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -403,7 +403,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8;$(DXSDK_DIR)\lib\x86"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9;$(DXSDK_DIR)\lib\x86"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -489,7 +489,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8;$(DXSDK_DIR)\lib\x64"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9;$(DXSDK_DIR)\lib\x64"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
@@ -574,7 +574,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8;$(DXSDK_DIR)\lib\x86"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9;$(DXSDK_DIR)\lib\x86"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				TargetMachine="1"
@@ -657,7 +657,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8;$(DXSDK_DIR)\lib\x64"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9;$(DXSDK_DIR)\lib\x64"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				TargetMachine="17"
diff --git a/examples/test_all/fibonacci/msvs/fibonacci.vcproj b/examples/test_all/fibonacci/msvs/fibonacci.vcproj
index 7d38fd0..186c37f 100644
--- a/examples/test_all/fibonacci/msvs/fibonacci.vcproj
+++ b/examples/test_all/fibonacci/msvs/fibonacci.vcproj
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="1"
@@ -141,7 +141,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb_debug.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				TargetMachine="17"
@@ -218,7 +218,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc8;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\ia32\vc_mt;$(TBBROOT)\lib\ia32\vc9;$(SolutionDir)\..\..\..\..\lib\ia32\vc_mt;$(SolutionDir)\..\..\..\..\lib\ia32\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
@@ -297,7 +297,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="tbb.lib"
 				LinkIncremental="1"
-				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc8;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc8"
+				AdditionalLibraryDirectories="$(TBBROOT)\lib\intel64\vc_mt;$(TBBROOT)\lib\intel64\vc9;$(SolutionDir)\..\..\..\..\lib\intel64\vc_mt;$(SolutionDir)\..\..\..\..\lib\intel64\vc9"
 				GenerateDebugInformation="true"
 				SubSystem="1"
 				OptimizeReferences="2"
diff --git a/include/tbb/concurrent_hash_map.h b/include/tbb/concurrent_hash_map.h
index c5e7946..4acc9b6 100644
--- a/include/tbb/concurrent_hash_map.h
+++ b/include/tbb/concurrent_hash_map.h
@@ -53,6 +53,9 @@
 #include "tbb_exception.h"
 #include "tbb_profiling.h"
 #include "internal/_concurrent_unordered_impl.h" // Need tbb_hasher
+#if __TBB_INITIALIZER_LISTS_PRESENT
+#include <initializer_list>
+#endif
 #if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
 #include <typeinfo>
 #endif
@@ -762,6 +765,17 @@ public:
         internal_copy(first, last);
     }
 
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! Construct empty table with n preallocated buckets. This number serves also as initial concurrency level.
+    concurrent_hash_map(const std::initializer_list<value_type> &il, const allocator_type &a = allocator_type())
+        : my_allocator(a)
+    {
+        reserve(il.size());
+        internal_copy(il.begin(), il.end());
+    }
+
+#endif //__TBB_INITIALIZER_LISTS_PRESENT
+
     //! Assignment
     concurrent_hash_map& operator=( const concurrent_hash_map& table ) {
         if( this!=&table ) {
@@ -771,6 +785,16 @@ public:
         return *this;
     }
 
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! Assignment
+    concurrent_hash_map& operator=( const std::initializer_list<value_type> &il ) {
+        clear();
+        reserve(il.size());
+        internal_copy(il.begin(), il.end());
+        return *this;
+    }
+#endif //__TBB_INITIALIZER_LISTS_PRESENT
+
 
     //! Rehashes and optionally resizes the whole table.
     /** Useful to optimize performance before or after concurrent operations.
diff --git a/include/tbb/concurrent_unordered_map.h b/include/tbb/concurrent_unordered_map.h
index a226ac3..714a5ab 100644
--- a/include/tbb/concurrent_unordered_map.h
+++ b/include/tbb/concurrent_unordered_map.h
@@ -141,6 +141,17 @@ public:
             base_type::insert(*first);
     }
 
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! Constructor from initializer_list
+    concurrent_unordered_map(std::initializer_list<value_type> const& il, size_type n_of_buckets = 8,
+        const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
+        const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
+    {
+        this->insert(il.begin(),il.end());
+    }
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
     concurrent_unordered_map(const concurrent_unordered_map& table) : base_type(table)
     {
     }
@@ -156,6 +167,15 @@ public:
         return (*this);
     }
 
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! assignment operator from initializer_list
+    concurrent_unordered_map& operator=(std::initializer_list<value_type> const& il)
+    {
+        base_type::operator=(il);
+        return (*this);
+    }
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
     iterator unsafe_erase(const_iterator where)
     {
         return base_type::unsafe_erase(where);
@@ -288,6 +308,17 @@ public:
             base_type::insert(*first);
     }
 
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! Constructor from initializer_list
+    concurrent_unordered_multimap(std::initializer_list<value_type> const& il, size_type n_of_buckets = 8,
+        const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
+        const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
+    {
+        this->insert(il.begin(),il.end());
+    }
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
     concurrent_unordered_multimap(const concurrent_unordered_multimap& table) : base_type(table)
     {
     }
@@ -303,6 +334,15 @@ public:
         return (*this);
     }
 
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! assignment operator from initializer_list
+    concurrent_unordered_multimap& operator=(std::initializer_list<value_type> const& il)
+    {
+        base_type::operator=(il);
+        return (*this);
+    }
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
     iterator unsafe_erase(const_iterator where)
     {
         return base_type::unsafe_erase(where);
diff --git a/include/tbb/concurrent_unordered_set.h b/include/tbb/concurrent_unordered_set.h
index 21494e0..8ce4163 100644
--- a/include/tbb/concurrent_unordered_set.h
+++ b/include/tbb/concurrent_unordered_set.h
@@ -121,6 +121,16 @@ public:
             base_type::insert(*first);
     }
 
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! Constructor from initializer_list
+   concurrent_unordered_set(std::initializer_list<value_type> const& il, size_type n_of_buckets = 8, const hasher& a_hasher = hasher(),
+        const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
+    {
+        this->insert(il.begin(),il.end());
+    }
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
     concurrent_unordered_set(const concurrent_unordered_set& table) : base_type(table)
     {
     }
@@ -136,6 +146,15 @@ public:
         return (*this);
     }
 
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! assignment operator from initializer_list
+    concurrent_unordered_set& operator=(std::initializer_list<value_type> const& il)
+    {
+        base_type::operator=(il);
+        return (*this);
+    }
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
     iterator unsafe_erase(const_iterator where)
     {
         return base_type::unsafe_erase(where);
@@ -227,6 +246,16 @@ public:
             base_type::insert(*first);
         }
     }
+    
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! Constructor from initializer_list
+   concurrent_unordered_multiset(std::initializer_list<value_type> const& il, size_type n_of_buckets = 8, const hasher& a_hasher = hasher(),
+        const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
+    {
+        this->insert(il.begin(),il.end());
+    }
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT    
 
     concurrent_unordered_multiset(const concurrent_unordered_multiset& table) : base_type(table)
     {
@@ -242,6 +271,15 @@ public:
         return (*this);
     }
 
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! assignment operator from initializer_list
+    concurrent_unordered_multiset& operator=(std::initializer_list<value_type> const& il)
+    {
+        base_type::operator=(il);
+        return (*this);
+    }
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
     // Modifiers
     std::pair<iterator, bool> insert(const value_type& value)
     {
diff --git a/include/tbb/concurrent_vector.h b/include/tbb/concurrent_vector.h
index 41cfbc7..787278a 100644
--- a/include/tbb/concurrent_vector.h
+++ b/include/tbb/concurrent_vector.h
@@ -659,12 +659,12 @@ public:
 #if TBB_DEPRECATED
     /** Returns old size. */
     size_type grow_by( size_type delta ) {
-        return delta ? internal_grow_by( delta, sizeof(T), &initialize_array, NULL ) : my_early_size;
+        return delta ? internal_grow_by( delta, sizeof(T), &initialize_array, NULL ) : my_early_size.load();
     }
 #else
     /** Returns iterator pointing to the first new element. */
     iterator grow_by( size_type delta ) {
-        return iterator(*this, delta ? internal_grow_by( delta, sizeof(T), &initialize_array, NULL ) : my_early_size);
+        return iterator(*this, delta ? internal_grow_by( delta, sizeof(T), &initialize_array, NULL ) : my_early_size.load());
     }
 #endif
 
@@ -672,12 +672,12 @@ public:
 #if TBB_DEPRECATED
     /** Returns old size. */
     size_type grow_by( size_type delta, const_reference t ) {
-        return delta ? internal_grow_by( delta, sizeof(T), &initialize_array_by, static_cast<const void*>(&t) ) : my_early_size;
+        return delta ? internal_grow_by( delta, sizeof(T), &initialize_array_by, static_cast<const void*>(&t) ) : my_early_size.load();
     }
 #else
     /** Returns iterator pointing to the first new element. */
     iterator grow_by( size_type delta, const_reference t ) {
-        return iterator(*this, delta ? internal_grow_by( delta, sizeof(T), &initialize_array_by, static_cast<const void*>(&t) ) : my_early_size);
+        return iterator(*this, delta ? internal_grow_by( delta, sizeof(T), &initialize_array_by, static_cast<const void*>(&t) ) : my_early_size.load());
     }
 #endif
 
diff --git a/include/tbb/flow_graph.h b/include/tbb/flow_graph.h
index cd0550c..75e05a5 100644
--- a/include/tbb/flow_graph.h
+++ b/include/tbb/flow_graph.h
@@ -39,6 +39,12 @@
 #include "concurrent_vector.h"
 #include "internal/_aggregator_impl.h"
 
+#if TBB_DEPRECATED_FLOW_ENQUEUE 
+#define FLOW_SPAWN(a) tbb::task::enqueue((a))
+#else
+#define FLOW_SPAWN(a) tbb::task::spawn((a))
+#endif
+
 // use the VC10 or gcc version of tuple if it is available.
 #if __TBB_CPP11_TUPLE_PRESENT
     #include <tuple>
@@ -134,7 +140,7 @@ static inline tbb::task *combine_tasks( tbb::task * left, tbb::task * right) {
     // left contains a task
     if(right != SUCCESSFULLY_ENQUEUED) {
         // both are valid tasks
-        tbb::task::enqueue(*left);
+        FLOW_SPAWN(*left);
         return right;
     }
     return left;
@@ -157,7 +163,7 @@ public:
     bool try_put( const T& t ) {
             task *res = try_put_task(t);
             if(!res) return false;
-            if (res != SUCCESSFULLY_ENQUEUED) task::enqueue(*res);
+            if (res != SUCCESSFULLY_ENQUEUED) FLOW_SPAWN(*res);
             return true;
         }
 
@@ -421,8 +427,8 @@ public:
         that need to block a wait_for_all() on the graph.  For example a one-off source. */
     template< typename Receiver, typename Body >
         void run( Receiver &r, Body body ) {
-       task::enqueue( * new ( task::allocate_additional_child_of( *my_root_task ) )
-           run_and_put_task< Receiver, Body >( r, body ) );
+       FLOW_SPAWN( (* new ( task::allocate_additional_child_of( *my_root_task ) ) 
+                   run_and_put_task< Receiver, Body >( r, body )) );
     }
 
     //! Spawns a task that runs a function object
@@ -430,8 +436,7 @@ public:
         that need to block a wait_for_all() on the graph. For example a one-off source. */
     template< typename Body >
     void run( Body body ) {
-       task::enqueue( * new ( task::allocate_additional_child_of( *my_root_task ) )
-           run_task< Body >( body ) );
+       FLOW_SPAWN( * new ( task::allocate_additional_child_of( *my_root_task ) ) run_task< Body >( body ) );
     }
 
     //! Wait until graph is idle and decrement_wait_count calls equals increment_wait_count calls.
@@ -753,8 +758,8 @@ private:
 
     //! Spawns a task that applies the body
     /* override */ void spawn_put( ) {
-        task::enqueue( * new ( task::allocate_additional_child_of( *my_root_task ) )
-           internal:: source_task_bypass < source_node< output_type > >( *this ) );
+        FLOW_SPAWN( (* new ( task::allocate_additional_child_of( *my_root_task ) ) 
+                    internal:: source_task_bypass < source_node< output_type > >( *this ) ) );
     }
 
     friend class internal::source_task_bypass< source_node< output_type > >;
@@ -1224,7 +1229,7 @@ protected:
     inline bool enqueue_forwarding_task(buffer_operation &op_data) {
         task *ft = grab_forwarding_task(op_data);
         if(ft) {
-            task::enqueue(*ft);
+            FLOW_SPAWN(*ft);
             return true;
         }
         return false;
@@ -1804,7 +1809,7 @@ private:
                 return;
         }
         task * rtask = decrement_counter();
-        if(rtask) task::enqueue(*rtask);
+        if(rtask) FLOW_SPAWN(*rtask);
     }
 
     task *forward_task() {
@@ -1862,10 +1867,8 @@ public:
         spin_mutex::scoped_lock lock(my_mutex);
         my_predecessors.add( src );
         if ( my_count < my_threshold && !my_successors.empty() ) {
-            task::enqueue( * new ( task::allocate_additional_child_of( *my_root_task ) )
-                           internal::
-                           forward_task_bypass
-                           < limiter_node<T> >( *this ) );
+            FLOW_SPAWN( (* new ( task::allocate_additional_child_of( *my_root_task ) ) 
+                        internal::forward_task_bypass < limiter_node<T> >( *this ) ) );
         }
         return true;
     }
diff --git a/include/tbb/internal/_concurrent_unordered_impl.h b/include/tbb/internal/_concurrent_unordered_impl.h
index b6aeb7d..4df875b 100644
--- a/include/tbb/internal/_concurrent_unordered_impl.h
+++ b/include/tbb/internal/_concurrent_unordered_impl.h
@@ -58,6 +58,10 @@
 #include "../tbb_allocator.h"
 #include "tbb/atomic.h"
 
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    #include <initializer_list>
+#endif
+
 namespace tbb {
 namespace interface5 {
 //! @cond INTERNAL
@@ -719,6 +723,17 @@ protected:
         return (*this);
     }
 
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! assignment operator from initializer_list
+    concurrent_unordered_base& operator=(std::initializer_list<value_type> const& il)
+    {
+        this->clear();
+        this->insert(il.begin(),il.end());
+        return (*this);
+    }
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
+
     ~concurrent_unordered_base() {
         // Delete all node segments
         internal_clear();
diff --git a/include/tbb/internal/_flow_graph_join_impl.h b/include/tbb/internal/_flow_graph_join_impl.h
index 751620b..a92f5b8 100644
--- a/include/tbb/internal/_flow_graph_join_impl.h
+++ b/include/tbb/internal/_flow_graph_join_impl.h
@@ -689,7 +689,7 @@ namespace internal {
                     forward_task_bypass
                     <my_node_type>(*my_node);
                 if(!handle_task) return rtask;
-                task::enqueue(*rtask);
+                FLOW_SPAWN(*rtask);
             }
             return NULL;
         }
@@ -760,7 +760,7 @@ namespace internal {
                     forward_task_bypass
                     <my_node_type>(*my_node);
                 if(!handle_task) return rtask;
-                task::enqueue( *rtask);
+                FLOW_SPAWN( *rtask);
             }
             return NULL;
         }
@@ -859,7 +859,7 @@ namespace internal {
                         rtask = new ( task::allocate_additional_child_of( *(this->my_root_task) ) )
                             forward_task_bypass<my_node_type>(*my_node);
                         if(handle_task) {
-                            task::enqueue(*rtask);
+                            FLOW_SPAWN(*rtask);
                             rtask = NULL;
                         }
                         do_fwd = false;
@@ -1058,7 +1058,7 @@ namespace internal {
                         task *rtask = new ( task::allocate_additional_child_of(*(this->my_root_task)) )
                                 forward_task_bypass
                                 <join_node_base<JP,InputTuple,OutputTuple> >(*this);
-                        task::enqueue(*rtask);
+                        FLOW_SPAWN(*rtask);
                         forwarder_busy = true;
                     }
                     __TBB_store_with_release(current->status, SUCCEEDED);
diff --git a/include/tbb/internal/_flow_graph_node_impl.h b/include/tbb/internal/_flow_graph_node_impl.h
index 4467ebf..2874d58 100644
--- a/include/tbb/internal/_flow_graph_node_impl.h
+++ b/include/tbb/internal/_flow_graph_node_impl.h
@@ -271,7 +271,7 @@ namespace internal {
             task *new_task = apply_body_bypass(i);
             if(!new_task) return;
             if(new_task == SUCCESSFULLY_ENQUEUED) return;
-            task::enqueue(*new_task);
+            FLOW_SPAWN(*new_task);
             return;
         }
         
@@ -296,7 +296,7 @@ namespace internal {
 
        //! Spawns a task that calls apply_body( input )
        inline void spawn_body_task( const input_type &input ) {
-           task::enqueue(*create_body_task(input));
+           FLOW_SPAWN(*create_body_task(input));
        }
         
        //! This is executed by an enqueued task, the "forwarder"
@@ -321,7 +321,7 @@ namespace internal {
 
        //! Spawns a task that calls forward()
        inline void spawn_forward_task() {
-           task::enqueue(*create_forward_task());
+           FLOW_SPAWN(*create_forward_task());
        }
     };  // function_input_base
 
@@ -569,7 +569,7 @@ namespace internal {
         bool try_put(const output_type &i) {
             task *res = my_successors.try_put_task(i);
             if(!res) return false;
-            if(res != SUCCESSFULLY_ENQUEUED) task::enqueue(*res);
+            if(res != SUCCESSFULLY_ENQUEUED) FLOW_SPAWN(*res);
             return true;
         }
     };
diff --git a/include/tbb/internal/_mutex_padding.h b/include/tbb/internal/_mutex_padding.h
new file mode 100644
index 0000000..6130dab
--- /dev/null
+++ b/include/tbb/internal/_mutex_padding.h
@@ -0,0 +1,76 @@
+/*
+    Copyright 2005-2013 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_mutex_padding_H
+#define __TBB_mutex_padding_H
+
+// wrapper for padding mutexes to be alone on a cache line, without requiring they be allocated
+// from a pool.  Because we allow them to be defined anywhere they must be two cache lines in size.
+
+namespace tbb {
+namespace interface7 {
+namespace internal {
+
+static const size_t cache_line_size = 64;
+
+// Pad a mutex to occupy a number of full cache lines sufficient to avoid false sharing
+// with other data; space overhead is up to 2*cache_line_size-1.
+template<typename Mutex>
+class padded_mutex {
+    typedef long pad_type;
+    pad_type my_pad[((sizeof(Mutex)+cache_line_size-1)/cache_line_size+1)*cache_line_size/sizeof(pad_type)];
+
+    Mutex *impl() { return (Mutex *)((uintptr_t(this)|(cache_line_size-1))+1);}
+
+public:
+    static const bool is_rw_mutex = Mutex::is_rw_mutex;
+    static const bool is_recursive_mutex = Mutex::is_recursive_mutex;
+    static const bool is_fair_mutex = Mutex::is_fair_mutex;
+
+    padded_mutex() { new(impl()) Mutex(); }
+    ~padded_mutex() { impl()->~Mutex(); }
+
+    //! Represents acquisition of a mutex.
+    class scoped_lock :  tbb::internal::no_copy {
+        typename Mutex::scoped_lock my_scoped_lock;
+    public:
+        scoped_lock() : my_scoped_lock() {}
+        scoped_lock( padded_mutex& m ) : my_scoped_lock(*m.impl()) { }
+        ~scoped_lock() {  }
+
+        void acquire( padded_mutex& m ) { my_scoped_lock.acquire(*m.impl()); }
+        bool try_acquire( padded_mutex& m ) { return my_scoped_lock.try_acquire(*m.impl()); }
+        void release() { my_scoped_lock.release(); }
+    };
+};
+
+} // namespace internal
+} // namespace interface7
+} // namespace tbb
+
+#endif /* __TBB_mutex_padding_H */
diff --git a/include/tbb/internal/_x86_eliding_mutex_impl.h b/include/tbb/internal/_x86_eliding_mutex_impl.h
new file mode 100644
index 0000000..43118df
--- /dev/null
+++ b/include/tbb/internal/_x86_eliding_mutex_impl.h
@@ -0,0 +1,157 @@
+/*
+    Copyright 2005-2013 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB__x86_eliding_mutex_impl_H
+#define __TBB__x86_eliding_mutex_impl_H
+
+#ifndef __TBB_spin_mutex_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#if ( __TBB_x86_32 || __TBB_x86_64 )
+
+namespace tbb {
+namespace interface7 {
+namespace internal {
+
+template<typename Mutex>
+class padded_mutex;
+
+//! An eliding lock that occupies a single byte.
+/** A x86_eliding_mutex is an HLE-enabled spin mutex. It is recommended to
+    put the mutex on a cache line that is not shared by the data it protects.
+    It should be used for locking short critical sections where the lock is
+    contended but the data it protects are not.  If zero-initialized, the
+    mutex is considered unheld.
+    @ingroup synchronization */
+class x86_eliding_mutex {
+
+    //! 0 if lock is released, 1 if lock is acquired.
+    __TBB_atomic_flag flag;
+
+    friend class padded_mutex<x86_eliding_mutex>;
+
+public:
+    //! Construct unacquired lock.
+    /** Equivalent to zero-initialization of *this. */
+    x86_eliding_mutex() : flag(0) {}
+
+// bug in gcc 3.x.x causes syntax error in spite of the friend declaration above.
+// Make the scoped_lock public in that case.
+#if __TBB_USE_X86_ELIDING_MUTEX || __TBB_GCC_VERSION < 40000
+#else
+    // by default we will not provide the scoped_lock interface.  The user
+    // should use the padded version of the mutex.  scoped_lock is used in
+    // padded_mutex template.
+private:
+#endif
+    // scoped_lock in padded_mutex<> is the interface to use.
+    //! Represents acquisition of a mutex.
+    class scoped_lock : tbb::internal::no_copy {
+    private:
+        //! Points to currently held mutex, or NULL if no lock is held.
+        x86_eliding_mutex* my_mutex;
+
+    public:
+        //! Construct without acquiring a mutex.
+        scoped_lock() : my_mutex(NULL) {}
+
+        //! Construct and acquire lock on a mutex.
+        scoped_lock( x86_eliding_mutex& m ) : my_mutex(NULL) { acquire(m); }
+
+        //! Acquire lock.
+        void acquire( x86_eliding_mutex& m ) {
+            __TBB_ASSERT( !my_mutex, "already holding a lock" );
+
+            my_mutex=&m;
+            my_mutex->lock();
+        }
+
+        //! Try acquiring lock (non-blocking)
+        /** Return true if lock acquired; false otherwise. */
+        bool try_acquire( x86_eliding_mutex& m ) {
+            __TBB_ASSERT( !my_mutex, "already holding a lock" );
+
+            bool result = m.try_lock();
+            if( result ) {
+                my_mutex = &m;
+            }
+            return result;
+        }
+
+        //! Release lock
+        void release() {
+            __TBB_ASSERT( my_mutex, "release on scoped_lock that is not holding a lock" );
+
+            my_mutex->unlock();
+            my_mutex = NULL;
+        }
+
+        //! Destroy lock.  If holding a lock, releases the lock first.
+        ~scoped_lock() {
+            if( my_mutex ) {
+                release();
+            }
+        }
+    };
+#if __TBB_USE_X86_ELIDING_MUTEX || __TBB_GCC_VERSION < 40000
+#else
+public:
+#endif  /* __TBB_USE_X86_ELIDING_MUTEX */
+
+    // Mutex traits
+    static const bool is_rw_mutex = false;
+    static const bool is_recursive_mutex = false;
+    static const bool is_fair_mutex = false;
+
+    // ISO C++0x compatibility methods
+
+    //! Acquire lock
+    void lock() {
+        __TBB_LockByteElided(flag);
+    }
+
+    //! Try acquiring lock (non-blocking)
+    /** Return true if lock acquired; false otherwise. */
+    bool try_lock() {
+        return __TBB_TryLockByteElided(flag);
+    }
+
+    //! Release lock
+    void unlock() {
+        __TBB_UnlockByteElided( flag );
+    }
+}; // end of x86_eliding_mutex
+
+} // namespace internal
+} // namespace interface7
+} // namespace tbb
+
+#endif /* ( __TBB_x86_32 || __TBB_x86_64 ) */
+
+#endif /* __TBB__x86_eliding_mutex_impl_H */
diff --git a/include/tbb/machine/gcc_generic.h b/include/tbb/machine/gcc_generic.h
index b1ffc57..655c2e6 100644
--- a/include/tbb/machine/gcc_generic.h
+++ b/include/tbb/machine/gcc_generic.h
@@ -133,3 +133,7 @@ inline void __TBB_machine_unlock_byte( __TBB_atomic_flag &flag ) {
 #if __TBB_WORDSIZE==4
     #define __TBB_USE_GENERIC_DWORD_LOAD_STORE              1
 #endif
+
+#if __TBB_x86_32 || __TBB_x86_64
+#include "gcc_itsx.h"
+#endif
diff --git a/include/tbb/machine/gcc_ia32_common.h b/include/tbb/machine/gcc_ia32_common.h
index 6127045..1128b6e 100644
--- a/include/tbb/machine/gcc_ia32_common.h
+++ b/include/tbb/machine/gcc_ia32_common.h
@@ -94,4 +94,6 @@ inline void __TBB_set_cpu_ctl_env ( const __TBB_cpu_ctl_env_t* ctl ) {
 }
 #endif /* !__TBB_CPU_CTL_ENV_PRESENT */
 
+#include "gcc_itsx.h"
+
 #endif /* __TBB_machine_gcc_ia32_common_H */
diff --git a/include/tbb/machine/mic_common.h b/include/tbb/machine/gcc_itsx.h
similarity index 54%
copy from include/tbb/machine/mic_common.h
copy to include/tbb/machine/gcc_itsx.h
index ca423d8..92f5d68 100644
--- a/include/tbb/machine/mic_common.h
+++ b/include/tbb/machine/gcc_itsx.h
@@ -26,43 +26,41 @@
     the GNU General Public License.
 */
 
-#ifndef __TBB_mic_common_H
-#define __TBB_mic_common_H
-
-#ifndef __TBB_machine_H
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_itsx_H)
 #error Do not #include this internal file directly; use public TBB headers instead.
 #endif
 
-#if ! __TBB_DEFINE_MIC
-    #error mic_common.h should be included only when building for Intel(R) Many Integrated Core Architecture
-#endif
+#define __TBB_machine_gcc_itsx_H
 
-#ifndef __TBB_PREFETCHING
-#define __TBB_PREFETCHING 1
-#endif
-#if __TBB_PREFETCHING
-#include <immintrin.h>
-#define __TBB_cl_prefetch(p) _mm_prefetch((const char*)p, _MM_HINT_T1)
-#define __TBB_cl_evict(p) _mm_clevict(p, _MM_HINT_T1)
-#endif
+#define __TBB_OP_XACQUIRE 0xF2
+#define __TBB_OP_XRELEASE 0xF3
+#define __TBB_OP_LOCK     0xF0
 
-/** Early Intel(R) Many Integrated Core Architecture does not support mfence and pause instructions **/
-#define __TBB_full_memory_fence __TBB_release_consistency_helper
-#define __TBB_Pause(x) _mm_delay_32(16*(x))
-#define __TBB_STEALING_PAUSE 1500/16
-#include <sched.h>
-#define __TBB_Yield() sched_yield()
+#define __TBB_STRINGIZE_INTERNAL(arg) #arg
+#define __TBB_STRINGIZE(arg) __TBB_STRINGIZE_INTERNAL(arg)
 
-/** FPU control setting **/
-#define __TBB_CPU_CTL_ENV_PRESENT 0
+#ifdef __TBB_x86_64
+#define __TBB_r_out "=r"
+#else
+#define __TBB_r_out "=q"
+#endif
 
-/** Specifics **/
-#define __TBB_STEALING_ABORT_ON_CONTENTION 1
-#define __TBB_YIELD2P 1
-#define __TBB_HOARD_NONLOCAL_TASKS 1
+inline static uint8_t __TBB_machine_try_lock_elided( volatile uint8_t* lk )
+{
+    uint8_t value = 1;
+    __asm__ volatile (".byte " __TBB_STRINGIZE(__TBB_OP_XACQUIRE)"; lock; xchgb %0, %1;"
+                      : __TBB_r_out(value), "=m"(*lk)  : "0"(value), "m"(*lk) : "memory" );
+    return uint8_t(value^1);
+}
 
-#if ! ( __FreeBSD__ || __linux__ )
-    #error Intel(R) Many Integrated Core Compiler does not define __FreeBSD__ or __linux__ anymore. Check for the __TBB_XXX_BROKEN defined under __FreeBSD__ or __linux__.
-#endif /* ! ( __FreeBSD__ || __linux__ ) */
+inline static void __TBB_machine_try_lock_elided_cancel()
+{
+    // 'pause' instruction aborts HLE/RTM transactions
+    __asm__ volatile ("pause\n" : : : "memory" );
+}
 
-#endif /* __TBB_mic_common_H */
+inline static void __TBB_machine_unlock_elided( volatile uint8_t* lk )
+{
+    __asm__ volatile (".byte " __TBB_STRINGIZE(__TBB_OP_XRELEASE)"; movb $0, %0" 
+                      : "=m"(*lk) : "m"(*lk) : "memory" );
+}
diff --git a/include/tbb/machine/icc_generic.h b/include/tbb/machine/icc_generic.h
index a0ae2c1..6a76393 100644
--- a/include/tbb/machine/icc_generic.h
+++ b/include/tbb/machine/icc_generic.h
@@ -31,7 +31,7 @@
 #endif
 
 #if ! __TBB_ICC_BUILTIN_ATOMICS_PRESENT
-    #error "Intel C++ Compiler of at least 12.1 version is needed to use ICC intrinsics port"
+    #error "Intel C++ Compiler of at least 12.0 version is needed to use ICC intrinsics port"
 #endif
 
 #define __TBB_machine_icc_generic_H
@@ -256,3 +256,4 @@ template <typename T>
 inline void __TBB_machine_AND( T *operand, T addend ) {
     __atomic_fetch_and_explicit(operand, addend, tbb::internal::memory_order_seq_cst);
 }
+
diff --git a/include/tbb/machine/linux_intel64.h b/include/tbb/machine/linux_intel64.h
index 85c948b..92d617c 100644
--- a/include/tbb/machine/linux_intel64.h
+++ b/include/tbb/machine/linux_intel64.h
@@ -101,3 +101,4 @@ static inline void __TBB_machine_and( volatile void *ptr, uint64_t value ) {
 #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
 #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
 #define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
+
diff --git a/include/tbb/machine/mic_common.h b/include/tbb/machine/mic_common.h
index ca423d8..cdaa672 100644
--- a/include/tbb/machine/mic_common.h
+++ b/include/tbb/machine/mic_common.h
@@ -46,8 +46,8 @@
 #define __TBB_cl_evict(p) _mm_clevict(p, _MM_HINT_T1)
 #endif
 
-/** Early Intel(R) Many Integrated Core Architecture does not support mfence and pause instructions **/
-#define __TBB_full_memory_fence __TBB_release_consistency_helper
+/** Intel(R) Many Integrated Core Architecture does not support mfence and pause instructions **/
+#define __TBB_full_memory_fence() __asm__ __volatile__("lock; addl $0,(%%rsp)":::"memory")
 #define __TBB_Pause(x) _mm_delay_32(16*(x))
 #define __TBB_STEALING_PAUSE 1500/16
 #include <sched.h>
diff --git a/include/tbb/machine/msvc_ia32_common.h b/include/tbb/machine/msvc_ia32_common.h
index b2e6cb5..5f15e87 100644
--- a/include/tbb/machine/msvc_ia32_common.h
+++ b/include/tbb/machine/msvc_ia32_common.h
@@ -45,7 +45,6 @@
     #define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 0
 #endif
 
-
 #define __TBB_NO_X86_MSVC_INLINE_ASM_MSG "The compiler being used is not supported (outdated?)"
 
 #if (_MSC_VER >= 1300) || (__INTEL_COMPILER) //Use compiler intrinsic when available
@@ -181,4 +180,16 @@ extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
 
 #undef __TBB_r
 
+extern "C" {
+    __int8 __TBB_EXPORTED_FUNC __TBB_machine_try_lock_elided (volatile void* ptr);
+    void   __TBB_EXPORTED_FUNC __TBB_machine_unlock_elided (volatile void* ptr);
+
+    // 'pause' instruction aborts HLE/RTM transactions
+#if __TBB_PAUSE_USE_INTRINSIC
+    inline static void __TBB_machine_try_lock_elided_cancel() { _mm_pause(); }
+#else
+    inline static void __TBB_machine_try_lock_elided_cancel() { _asm pause; }
+#endif
+}
+
 #endif /* __TBB_machine_msvc_ia32_common_H */
diff --git a/include/tbb/memory_pool.h b/include/tbb/memory_pool.h
index 8205936..cf907d8 100644
--- a/include/tbb/memory_pool.h
+++ b/include/tbb/memory_pool.h
@@ -264,6 +264,8 @@ inline fixed_pool::fixed_pool(void *buf, size_t size) : my_buffer(buf), my_size(
 }
 inline void *fixed_pool::allocate_request(intptr_t pool_id, size_t & bytes) {
     fixed_pool &self = *reinterpret_cast<fixed_pool*>(pool_id);
+    // TODO: we can implement "buffer for fixed pools used only once" policy
+    // on low-level side, thus eliminate atomics here
     if( !tbb::internal::as_atomic(self.my_size).compare_and_swap(0, (bytes=self.my_size)) )
         return 0; // all the memory was given already
     return self.my_buffer;
diff --git a/include/tbb/parallel_reduce.h b/include/tbb/parallel_reduce.h
index fbe1eef..4eacdf2 100644
--- a/include/tbb/parallel_reduce.h
+++ b/include/tbb/parallel_reduce.h
@@ -66,12 +66,16 @@ namespace internal {
             my_body(NULL)
         {
         }
+        ~finish_reduce() {
+            if( has_right_zombie )
+                zombie_space.begin()->~Body();
+        }
         task* execute() {
             if( has_right_zombie ) {
                 // Right child was stolen.
                 Body* s = zombie_space.begin();
                 my_body->join( *s );
-                s->~Body();
+                // Body::join() won't be called if canceled. Defer destruction to destructor
             }
             if( my_context==left_child )
                 itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body );
diff --git a/include/tbb/scalable_allocator.h b/include/tbb/scalable_allocator.h
index 10c4604..7f426a6 100644
--- a/include/tbb/scalable_allocator.h
+++ b/include/tbb/scalable_allocator.h
@@ -88,16 +88,41 @@ void __TBB_EXPORTED_FUNC scalable_aligned_free (void* ptr);
     @ingroup memory_allocation */
 size_t __TBB_EXPORTED_FUNC scalable_msize (void* ptr);
 
+/* Results for scalable_allocation_* functions */
+typedef enum {
+    TBBMALLOC_OK,
+    TBBMALLOC_INVALID_PARAM,
+    TBBMALLOC_UNSUPPORTED,
+    TBBMALLOC_NO_MEMORY,
+    TBBMALLOC_NO_EFFECT
+} ScalableAllocationResult;
+
 /* Setting TBB_MALLOC_USE_HUGE_PAGES environment variable to 1 enables huge pages.
    scalable_allocation_mode call has priority over environment variable. */
-enum AllocationModeParam {
-    USE_HUGE_PAGES /* value turns using huge pages on and off */
-};
+typedef enum {
+    TBBMALLOC_USE_HUGE_PAGES,  /* value turns using huge pages on and off */
+    /* deprecated, kept for backward compatibility only */
+    USE_HUGE_PAGES = TBBMALLOC_USE_HUGE_PAGES
+} AllocationModeParam;
 
 /** Set TBB allocator-specific allocation modes.
     @ingroup memory_allocation */
 int __TBB_EXPORTED_FUNC scalable_allocation_mode(int param, intptr_t value);
 
+typedef enum {
+    /* Clean internal allocator buffers for all threads.
+       Returns TBBMALLOC_NO_EFFECT if no buffers cleaned,
+       TBBMALLOC_OK if some memory released from buffers. */
+    TBBMALLOC_CLEAN_ALL_BUFFERS,
+    /* Clean internal allocator buffer for current thread only.
+       Return values same as for TBBMALLOC_CLEAN_ALL_BUFFERS. */
+    TBBMALLOC_CLEAN_THREAD_BUFFERS
+} ScalableAllocationCmd;
+
+/** Call TBB allocator-specific commands.
+    @ingroup memory_allocation */
+int __TBB_EXPORTED_FUNC scalable_allocation_command(int cmd, void *param);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif /* __cplusplus */
@@ -145,11 +170,19 @@ struct MemPoolPolicy {
         reserved(0) {}
 };
 
+// enums have same values as appropriate enums from ScalableAllocationResult
+// TODO: use ScalableAllocationResult in pool_create directly
 enum MemPoolError {
-    POOL_OK,            // pool created successfully
-    INVALID_POLICY,     // invalid policy parameters found
-    UNSUPPORTED_POLICY, // requested pool policy is not supported by allocator library
-    NO_MEMORY           // lack of memory during pool creation
+    // pool created successfully
+    POOL_OK = TBBMALLOC_OK,
+    // invalid policy parameters found
+    INVALID_POLICY = TBBMALLOC_INVALID_PARAM,
+     // requested pool policy is not supported by allocator library
+    UNSUPPORTED_POLICY = TBBMALLOC_UNSUPPORTED,
+    // lack of memory during pool creation
+    NO_MEMORY = TBBMALLOC_NO_MEMORY,
+    // action takes no effect
+    NO_EFFECT = TBBMALLOC_NO_EFFECT
 };
 
 MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy,
diff --git a/include/tbb/spin_mutex.h b/include/tbb/spin_mutex.h
index f8b8d51..f4bb7cb 100644
--- a/include/tbb/spin_mutex.h
+++ b/include/tbb/spin_mutex.h
@@ -35,13 +35,14 @@
 #include "tbb_stddef.h"
 #include "tbb_machine.h"
 #include "tbb_profiling.h"
+#include "internal/_mutex_padding.h"
 
 namespace tbb {
 
 //! A lock that occupies a single byte.
-/** A spin_mutex is a spin mutex that fits in a single byte.  
-    It should be used only for locking short critical sections 
-    (typically less than 20 instructions) when fairness is not an issue.  
+/** A spin_mutex is a spin mutex that fits in a single byte.
+    It should be used only for locking short critical sections
+    (typically less than 20 instructions) when fairness is not an issue.
     If zero-initialized, the mutex is considered unheld.
     @ingroup synchronization */
 class spin_mutex {
@@ -61,11 +62,11 @@ public:
     class scoped_lock : internal::no_copy {
     private:
         //! Points to currently held mutex, or NULL if no lock is held.
-        spin_mutex* my_mutex; 
+        spin_mutex* my_mutex;
 
-        //! Value to store into spin_mutex::flag to unlock the mutex. 
-        /** This variable is no longer used. Instead, 0 and 1 are used to 
-            represent that the lock is free and acquired, respectively. 
+        //! Value to store into spin_mutex::flag to unlock the mutex.
+        /** This variable is no longer used. Instead, 0 and 1 are used to
+            represent that the lock is free and acquired, respectively.
             We keep the member variable here to ensure backward compatibility */
         __TBB_Flag my_unlock_value;
 
@@ -85,14 +86,14 @@ public:
         scoped_lock() : my_mutex(NULL), my_unlock_value(0) {}
 
         //! Construct and acquire lock on a mutex.
-        scoped_lock( spin_mutex& m ) : my_unlock_value(0) { 
+        scoped_lock( spin_mutex& m ) : my_unlock_value(0) {
             internal::suppress_unused_warning(my_unlock_value);
 #if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
             my_mutex=NULL;
             internal_acquire(m);
 #else
-            __TBB_LockByte(m.flag);
             my_mutex=&m;
+            __TBB_LockByte(m.flag);
 #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT*/
         }
 
@@ -101,8 +102,8 @@ public:
 #if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
             internal_acquire(m);
 #else
-            __TBB_LockByte(m.flag);
             my_mutex = &m;
+            __TBB_LockByte(m.flag);
 #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT*/
         }
 
@@ -141,6 +142,7 @@ public:
         }
     };
 
+    //! Internal constructor with ITT instrumentation.
     void __TBB_EXPORTED_METHOD internal_construct();
 
     // Mutex traits
@@ -184,10 +186,35 @@ public:
     }
 
     friend class scoped_lock;
-};
+}; // end of spin_mutex
 
 __TBB_DEFINE_PROFILING_SET_NAME(spin_mutex)
 
 } // namespace tbb
 
+#if ( __TBB_x86_32 || __TBB_x86_64 )
+#include "internal/_x86_eliding_mutex_impl.h"
+#endif
+
+namespace tbb {
+//! A cross-platform spin mutex with speculative lock acquisition.
+/** On platforms with proper HW support, this lock may speculatively execute
+    its critical sections, using HW mechanisms to detect real data races and
+    ensure atomicity of the critical sections. In particular, it uses
+    Intel(R) Transactional Synchronization Extensions (Intel(R) TSX).
+    Without such HW support, it behaves like a spin_mutex.
+    It should be used for locking short critical sections where the lock is
+    contended but the data it protects are not.  If zero-initialized, the
+    mutex is considered unheld.
+    @ingroup synchronization */
+
+#if ( __TBB_x86_32 || __TBB_x86_64 )
+typedef interface7::internal::padded_mutex<interface7::internal::x86_eliding_mutex> speculative_spin_mutex;
+#else
+typedef interface7::internal::padded_mutex<spin_mutex> speculative_spin_mutex;
+#endif
+__TBB_DEFINE_PROFILING_SET_NAME(speculative_spin_mutex)
+
+} // namespace tbb
+
 #endif /* __TBB_spin_mutex_H */
diff --git a/include/tbb/task.h b/include/tbb/task.h
index dc233ab..f286c12 100644
--- a/include/tbb/task.h
+++ b/include/tbb/task.h
@@ -386,7 +386,7 @@ private:
     //! Scheduler instance that registered this context in its thread specific list.
     internal::generic_scheduler *my_owner;
 
-    //! Internal state (combination of state flags).
+    //! Internal state (combination of state flags, currently only may_have_children).
     uintptr_t my_state;
 
 #if __TBB_TASK_PRIORITY
@@ -440,6 +440,7 @@ public:
         init();
     }
 
+    // Do not introduce standalone unbind method since it will break state propagation assumptions
     __TBB_EXPORTED_METHOD ~task_group_context ();
 
     //! Forcefully reinitializes the context after the task tree it was associated with is completed.
@@ -496,11 +497,9 @@ private:
     static const kind_type detached = kind_type(binding_completed+1);
     static const kind_type dying = kind_type(detached+1);
 
-    //! Propagates state change (if any) from an ancestor
-    /** Checks if one of this object's ancestors is in a new state, and propagates
-        the new state to all its descendants in this object's heritage line. **/
+    //! Propagates any state change detected to *this, and as an optimisation possibly also upward along the heritage line.
     template <typename T>
-    void propagate_state_from_ancestors ( T task_group_context::*mptr_state, T new_state );
+    void propagate_task_group_state ( T task_group_context::*mptr_state, task_group_context& src, T new_state );
 
     //! Makes sure that the context is registered with a scheduler instance.
     inline void finish_initialization ( internal::generic_scheduler *local_sched );
diff --git a/include/tbb/tbb_config.h b/include/tbb/tbb_config.h
index 769ba5a..07a25df 100644
--- a/include/tbb/tbb_config.h
+++ b/include/tbb/tbb_config.h
@@ -37,6 +37,12 @@
     - known compiler/platform issues
 **/
 
+/*Check which standard library we use on OS X.*/
+/*__TBB_SYMBOL is defined only while processing exported symbols list where C++ is not allowed.*/
+#if !defined(__TBB_SYMBOL) && __APPLE__
+    #include <cstddef>
+#endif
+
 #define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
 
 #if __clang__
@@ -52,7 +58,7 @@
     #define __INTEL_COMPILER 1210
 #endif
 
-#if (__TBB_GCC_VERSION >= 40400) && !defined(__INTEL_COMPILER)
+#if __TBB_GCC_VERSION >= 40400 && !defined(__INTEL_COMPILER)
     /** warning suppression pragmas available in GCC since 4.4 **/
     #define __TBB_GCC_WARNING_SUPPRESSION_PRESENT 1
 #endif
@@ -69,41 +75,48 @@
  */
 
 #if __INTEL_COMPILER
-    /** On Windows environment when using Intel C++ compiler with Visual Studio 2010*,
-        the C++0x features supported by Visual C++ 2010 are enabled by default
-        TODO: find a way to get know if c++0x mode is specified in command line on windows **/
-    #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT    ( __VARIADIC_TEMPLATES && (__GXX_EXPERIMENTAL_CXX0X__ || _MSC_VER) )
-    #define __TBB_CPP11_RVALUE_REF_PRESENT            ( (__GXX_EXPERIMENTAL_CXX0X__ || _MSC_VER >= 1600) && (__INTEL_COMPILER >= 1200) )
+    /** C++11 mode detection macros for Intel C++ compiler (enabled by -std=c++0x option):
+          __INTEL_CXX11_MODE__ for version >=13.0
+          __STDC_HOSTED__ for version >=12.0 on Windows,
+          __GXX_EXPERIMENTAL_CXX0X__ for version >=12.0 on Linux and OS X. **/
+    //  On Windows, C++11 features supported by Visual Studio 2010 and higher are enabled by default
+    #ifndef __INTEL_CXX11_MODE__
+        #define __INTEL_CXX11_MODE__ ((_MSC_VER && __STDC_HOSTED__) || __GXX_EXPERIMENTAL_CXX0X__)
+        // TODO: check if more conditions can be simplified with the above macro
+    #endif
+    #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT    (__INTEL_CXX11_MODE__ && __VARIADIC_TEMPLATES)
+    #define __TBB_CPP11_RVALUE_REF_PRESENT            ((__GXX_EXPERIMENTAL_CXX0X__ || _MSC_VER >= 1600) && __INTEL_COMPILER >= 1200)
     #if  _MSC_VER >= 1600
         #define __TBB_EXCEPTION_PTR_PRESENT           ( __INTEL_COMPILER > 1300                                                \
                                                       /*ICC 12.1 Upd 10 and 13 beta Upd 2 fixed exception_ptr linking  issue*/ \
                                                       || (__INTEL_COMPILER == 1300 && __INTEL_COMPILER_BUILD_DATE >= 20120530) \
                                                       || (__INTEL_COMPILER == 1210 && __INTEL_COMPILER_BUILD_DATE >= 20120410) )
-    /** libstc++ that comes with GCC 4.6 use C++11 features not supported by ICC 12.1.
-     * Because of that ICC 12.1 does not support C++11 mode with with gcc 4.6. (or higher)
-     * , and therefore does not  define __GXX_EXPERIMENTAL_CXX0X__ macro**/
-    #elif (__TBB_GCC_VERSION >= 40404) && (__TBB_GCC_VERSION < 40600)
-        #define __TBB_EXCEPTION_PTR_PRESENT           ( __GXX_EXPERIMENTAL_CXX0X__ && __INTEL_COMPILER >= 1200 )
-    #elif (__TBB_GCC_VERSION >= 40600)
-        #define __TBB_EXCEPTION_PTR_PRESENT           ( __GXX_EXPERIMENTAL_CXX0X__ && __INTEL_COMPILER >= 1300 )
+    /** libstdc++ that comes with GCC 4.6 use C++11 features not supported by ICC 12.1.
+     *  Because of that ICC 12.1 does not support C++11 mode with with gcc 4.6 (or higher),
+     *  and therefore does not  define __GXX_EXPERIMENTAL_CXX0X__ macro **/
+    #elif __TBB_GCC_VERSION >= 40404 && __TBB_GCC_VERSION < 40600
+        #define __TBB_EXCEPTION_PTR_PRESENT           (__GXX_EXPERIMENTAL_CXX0X__ && __INTEL_COMPILER >= 1200)
+    #elif __TBB_GCC_VERSION >= 40600
+        #define __TBB_EXCEPTION_PTR_PRESENT           (__GXX_EXPERIMENTAL_CXX0X__ && __INTEL_COMPILER >= 1300)
     #else
         #define __TBB_EXCEPTION_PTR_PRESENT           0
     #endif
     #define __TBB_MAKE_EXCEPTION_PTR_PRESENT          (_MSC_VER >= 1700 || (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40600))
-    #define __TBB_STATIC_ASSERT_PRESENT               ( __GXX_EXPERIMENTAL_CXX0X__ || (_MSC_VER >= 1600) )
-    #define __TBB_CPP11_TUPLE_PRESENT                 ( (_MSC_VER >= 1600) || ((__GXX_EXPERIMENTAL_CXX0X__) && (__TBB_GCC_VERSION >= 40300)) )
+    #define __TBB_STATIC_ASSERT_PRESENT               (__INTEL_CXX11_MODE__ || _MSC_VER >= 1600)
+    #define __TBB_CPP11_TUPLE_PRESENT                 (_MSC_VER >= 1600 || (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40300))
     /** TODO: re-check for compiler version greater than 12.1 if it supports initializer lists**/
     #define __TBB_INITIALIZER_LISTS_PRESENT           0
     #define __TBB_CONSTEXPR_PRESENT                   0
-    #define __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT  0
+    #define __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT  __INTEL_CXX11_MODE__ 
 #elif __clang__
 //TODO: these options need to be rechecked
 /** on OS X* the only way to get C++11 is to use clang. For library features (e.g. exception_ptr) libc++ is also
  *  required. So there is no need to check GCC version for clang**/
     #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT     __has_feature(__cxx_variadic_templates__)
     #define __TBB_CPP11_RVALUE_REF_PRESENT             __has_feature(__cxx_rvalue_references__)
-    #define __TBB_EXCEPTION_PTR_PRESENT               (__GXX_EXPERIMENTAL_CXX0X__ && (__cplusplus >= 201103L))
-    #define __TBB_MAKE_EXCEPTION_PTR_PRESENT          (__GXX_EXPERIMENTAL_CXX0X__ && (__cplusplus >= 201103L))
+/** TODO: extend exception_ptr related conditions to cover libstdc++ **/
+    #define __TBB_EXCEPTION_PTR_PRESENT               (__cplusplus >= 201103L && _LIBCPP_VERSION)
+    #define __TBB_MAKE_EXCEPTION_PTR_PRESENT          (__cplusplus >= 201103L && _LIBCPP_VERSION)
     #define __TBB_STATIC_ASSERT_PRESENT               __has_feature(__cxx_static_assert__)
     /**Clang (preprocessor) has problems with dealing with expression having __has_include in #if's
      * used inside C++ code. (At least version that comes with OS X 10.8) **/
@@ -121,17 +134,17 @@
     /** __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 here is a substitution for _GLIBCXX_ATOMIC_BUILTINS_4, which is a prerequisite 
         for exception_ptr but cannot be used in this file because it is defined in a header, not by the compiler. 
         If the compiler has no atomic intrinsics, the C++ library should not expect those as well. **/
-    #define __TBB_EXCEPTION_PTR_PRESENT               ((__GXX_EXPERIMENTAL_CXX0X__) && (__TBB_GCC_VERSION >= 40404) && __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
-    #define __TBB_MAKE_EXCEPTION_PTR_PRESENT          ((__GXX_EXPERIMENTAL_CXX0X__) && (__TBB_GCC_VERSION >= 40600))
-    #define __TBB_STATIC_ASSERT_PRESENT               ((__GXX_EXPERIMENTAL_CXX0X__) && (__TBB_GCC_VERSION >= 40300))
-    #define __TBB_CPP11_TUPLE_PRESENT                 ((__GXX_EXPERIMENTAL_CXX0X__) && (__TBB_GCC_VERSION >= 40300))
-    #define __TBB_INITIALIZER_LISTS_PRESENT           ((__GXX_EXPERIMENTAL_CXX0X__) && (__TBB_GCC_VERSION >= 40400))
+    #define __TBB_EXCEPTION_PTR_PRESENT               (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40404 && __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
+    #define __TBB_MAKE_EXCEPTION_PTR_PRESENT          (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40600)
+    #define __TBB_STATIC_ASSERT_PRESENT               (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40300)
+    #define __TBB_CPP11_TUPLE_PRESENT                 (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40300)
+    #define __TBB_INITIALIZER_LISTS_PRESENT           (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40400)
     /** gcc seems have to support constexpr from 4.4 but tests in (test_atomic) seeming reasonable fail to compile prior 4.6**/
-    #define __TBB_CONSTEXPR_PRESENT                   ((__GXX_EXPERIMENTAL_CXX0X__) && (__TBB_GCC_VERSION >= 40400))
-    #define __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT  ((__GXX_EXPERIMENTAL_CXX0X__) && (__TBB_GCC_VERSION >= 40400))
+    #define __TBB_CONSTEXPR_PRESENT                   (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40400)
+    #define __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT  (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40400)
 #elif _MSC_VER
     #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT    0
-    #define __TBB_CPP11_RVALUE_REF_PRESENT            0
+    #define __TBB_CPP11_RVALUE_REF_PRESENT            (_MSC_VER >= 1600)
     #define __TBB_EXCEPTION_PTR_PRESENT               (_MSC_VER >= 1600)
     #define __TBB_STATIC_ASSERT_PRESENT               (_MSC_VER >= 1600)
     #define __TBB_MAKE_EXCEPTION_PTR_PRESENT          (_MSC_VER >= 1700)
@@ -172,13 +185,13 @@
 /* Actually ICC supports gcc __sync_* intrinsics starting 11.1,
  * but 64 bit support for 32 bit target comes in later ones*/
 /* TODO: change the version back to 4.1.2 once macro __TBB_WORD_SIZE become optional */
-#if (__TBB_GCC_VERSION >= 40306) || (__INTEL_COMPILER >= 1200)
+#if __TBB_GCC_VERSION >= 40306 || __INTEL_COMPILER >= 1200
     /** built-in atomics available in GCC since 4.1.2 **/
     #define __TBB_GCC_BUILTIN_ATOMICS_PRESENT 1
 #endif
 
-#if (__INTEL_COMPILER >= 1210)
-    /** built-in C++11 style atomics available in compiler since 12.1 **/
+#if __INTEL_COMPILER >= 1200
+    /** built-in C++11 style atomics available in ICC since 12.0 **/
     #define __TBB_ICC_BUILTIN_ATOMICS_PRESENT 1
 #endif
 
@@ -259,9 +272,8 @@
 
 /* TBB_USE_CAPTURED_EXCEPTION should be explicitly set to either 0 or 1, as it is used as C++ const */
 #ifndef TBB_USE_CAPTURED_EXCEPTION
-    /**TODO: enable it by default on OS X*, once it is enabled in pre-built binary **/
-    /** OS X* and IA64 pre-built TBB binaries do not support exception_ptr. **/
-    #if __TBB_EXCEPTION_PTR_PRESENT && !defined(__APPLE__) && !defined(__ia64__)
+    /** IA-64 architecture pre-built TBB binaries do not support exception_ptr. **/
+    #if __TBB_EXCEPTION_PTR_PRESENT && !defined(__ia64__)
         #define TBB_USE_CAPTURED_EXCEPTION 0
     #else
         #define TBB_USE_CAPTURED_EXCEPTION 1
@@ -273,14 +285,16 @@
 #endif /* defined TBB_USE_CAPTURED_EXCEPTION */
 
 /** Check whether the request to use GCC atomics can be satisfied **/
-#if (TBB_USE_GCC_BUILTINS && !__TBB_GCC_BUILTIN_ATOMICS_PRESENT)
+#if TBB_USE_GCC_BUILTINS && !__TBB_GCC_BUILTIN_ATOMICS_PRESENT
     #error "GCC atomic built-ins are not supported."
 #endif
 
 /** Internal TBB features & modes **/
 
 /** __TBB_WEAK_SYMBOLS_PRESENT denotes that the system supports the weak symbol mechanism **/
-#define __TBB_WEAK_SYMBOLS_PRESENT ( !_WIN32 && !__APPLE__ && !__sun && ((__TBB_GCC_VERSION >= 40000) || __INTEL_COMPILER ) )
+#ifndef __TBB_WEAK_SYMBOLS_PRESENT
+#define __TBB_WEAK_SYMBOLS_PRESENT ( !_WIN32 && !__APPLE__ && !__sun && (__TBB_GCC_VERSION >= 40000 || __INTEL_COMPILER ) )
+#endif
 
 /** __TBB_DYNAMIC_LOAD_ENABLED describes the system possibility to load shared libraries at run time **/
 #ifndef __TBB_DYNAMIC_LOAD_ENABLED
@@ -315,7 +329,9 @@
     #define TBB_PREVIEW_LOCAL_OBSERVER 1
     #define __TBB_NO_IMPLICIT_LINKAGE 1
     #define __TBB_RECYCLE_TO_ENQUEUE 1
-    #define __TBB_TASK_PRIORITY 0 // TODO: it will be removed in next versions
+    #ifndef __TBB_TASK_PRIORITY
+        #define __TBB_TASK_PRIORITY 0 // TODO: it will be removed in next versions
+    #endif
     #if !__TBB_SCHEDULER_OBSERVER
         #error TBB_PREVIEW_TASK_ARENA requires __TBB_SCHEDULER_OBSERVER to be enabled
     #endif
@@ -366,6 +382,14 @@
 #endif
 #endif
 
+/** __TBB_WIN8UI_SUPPORT enables support of New Windows*8 Store Apps and limit a possibility to load
+    shared libraries at run time only from application container **/
+#if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_FAMILY_APP
+    #define __TBB_WIN8UI_SUPPORT 1
+#else
+    #define __TBB_WIN8UI_SUPPORT 0
+#endif
+
 // Define preprocessor symbols used to determine architecture
 #if _WIN32||_WIN64
 #   if defined(_M_X64)||defined(__x86_64__)  // the latter for MinGW support
@@ -389,6 +413,7 @@
 #       define __TBB_generic_arch 1
 #   endif
 #endif
+
 /** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused by
     the bugs in compilers, standard or OS specific libraries. They should be
     removed as soon as the corresponding bugs are fixed or the buggy OS/compiler
@@ -438,7 +463,7 @@
     #define __TBB_PROTECTED_NESTED_CLASS_BROKEN 1
 #endif
 
-#if __MINGW32__ && (__GNUC__<4 || __GNUC__==4 && __GNUC_MINOR__<2)
+#if __MINGW32__ && __TBB_GCC_VERSION < 40200
     /** MinGW has a bug with stack alignment for routines invoked from MS RTLs.
         Since GCC 4.2, the bug can be worked around via a special attribute. **/
     #define __TBB_SSE_STACK_ALIGNMENT_BROKEN 1
@@ -489,25 +514,18 @@
     #define __TBB_MAIN_THREAD_AFFINITY_BROKEN 1
 #endif
 
-/** __TBB_WIN8UI_SUPPORT enables support of New Windows*8 Store Apps and limit a possibility to load
-    shared libraries at run time only from application container **/
-#if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_FAMILY_APP
-    #define __TBB_WIN8UI_SUPPORT 1
-#else
-    #define __TBB_WIN8UI_SUPPORT 0
-#endif
-
-#if !defined(__EXCEPTIONS) && __GNUC__==4 && (__GNUC_MINOR__==4 ||__GNUC_MINOR__==5 || (__INTEL_COMPILER==1300 && __TBB_GCC_VERSION>=40600 && __TBB_GCC_VERSION<=40700)) && defined(__GXX_EXPERIMENTAL_CXX0X__)
+#if __GXX_EXPERIMENTAL_CXX0X__ && !defined(__EXCEPTIONS) && \
+    __GNUC__==4 && (__GNUC_MINOR__==4 ||__GNUC_MINOR__==5 || (__INTEL_COMPILER==1300 && (__GNUC_MINOR__==6 ||__GNUC_MINOR__==7)))
 /* There is an issue for specific GCC toolchain when C++11 is enabled
    and exceptions are disabled:
-   exceprion_ptr.h/nested_exception.h are using throw unconditionally.
+   exceprion_ptr.h/nested_exception.h use throw unconditionally.
  */
     #define __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN 1
 #else
     #define __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN 0
 #endif
 
-#if __TBB_x86_32 && (__linux__ || __APPLE__ || _WIN32 || __sun) &&  ((defined(__INTEL_COMPILER) && (__INTEL_COMPILER <= 1300)) || (__GNUC__==3 && __GNUC_MINOR__==3 ) || defined(__SUNPRO_CC))
+#if __TBB_x86_32 && (__linux__ || __APPLE__ || _WIN32 || __sun) &&  ((defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1300) || (__GNUC__==3 && __GNUC_MINOR__==3 ) || defined(__SUNPRO_CC))
     // Some compilers for IA-32 fail to provide 8-byte alignment of objects on the stack,
     // even if the object specifies 8-byte alignment.  On such platforms, the IA-32 implementation
     // of 64 bit atomics (e.g. atomic<long long>) use different tactics depending upon
@@ -517,7 +535,7 @@
     #define __TBB_FORCE_64BIT_ALIGNMENT_BROKEN 0
 #endif
 
-#if (__TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT && (__TBB_GCC_VERSION < 40700) && (!defined(__INTEL_COMPILER) && !defined (__clang__)))
+#if __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT && __TBB_GCC_VERSION < 40700 && !defined(__INTEL_COMPILER) && !defined (__clang__)
     #define __TBB_ZERO_INIT_WITH_DEFAULTED_CTOR_BROKEN 1
 #endif
 /** End of __TBB_XXX_BROKEN macro section **/
diff --git a/include/tbb/tbb_machine.h b/include/tbb/tbb_machine.h
index bd8579e..995f301 100644
--- a/include/tbb/tbb_machine.h
+++ b/include/tbb/tbb_machine.h
@@ -916,6 +916,36 @@ inline __TBB_Flag __TBB_LockByte( __TBB_atomic_flag& flag ) {
 #define __TBB_UnlockByte(addr) __TBB_store_with_release((addr),0)
 #endif
 
+// lock primitives with TSX
+#if ( __TBB_x86_32 || __TBB_x86_64 )  /* only on ia32/intel64 */
+inline void __TBB_TryLockByteElidedCancel() { __TBB_machine_try_lock_elided_cancel(); }
+
+inline bool __TBB_TryLockByteElided( __TBB_atomic_flag& flag ) {
+    bool res = __TBB_machine_try_lock_elided( &flag )!=0;
+    // to avoid the "lemming" effect, we need to abort the transaction
+    // if  __TBB_machine_try_lock_elided returns false (i.e., someone else
+    // has acquired the mutex non-speculatively).
+    if( !res ) __TBB_TryLockByteElidedCancel();
+    return res;
+}
+
+inline void __TBB_LockByteElided( __TBB_atomic_flag& flag )
+{
+    for(;;) {
+        tbb::internal::spin_wait_while_eq( flag, 1 );
+        if( __TBB_machine_try_lock_elided( &flag ) )
+            return;
+        // Another thread acquired the lock "for real".
+        // To avoid the "lemming" effect, we abort the transaction.
+        __TBB_TryLockByteElidedCancel();
+    }
+}
+
+inline void __TBB_UnlockByteElided( __TBB_atomic_flag& flag ) {
+    __TBB_machine_unlock_elided( &flag );
+}
+#endif
+
 #ifndef __TBB_ReverseByte
 inline unsigned char __TBB_ReverseByte(unsigned char src) {
     return tbb::internal::reverse<unsigned char>::byte_table[src];
diff --git a/include/tbb/tbb_stddef.h b/include/tbb/tbb_stddef.h
index 916f1ea..6697030 100644
--- a/include/tbb/tbb_stddef.h
+++ b/include/tbb/tbb_stddef.h
@@ -31,10 +31,10 @@
 
 // Marketing-driven product version
 #define TBB_VERSION_MAJOR 4
-#define TBB_VERSION_MINOR 1
+#define TBB_VERSION_MINOR 2
 
 // Engineering-focused interface version
-#define TBB_INTERFACE_VERSION 6105
+#define TBB_INTERFACE_VERSION 7000
 #define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000
 
 // The oldest major interface version still supported
diff --git a/src/Makefile b/src/Makefile
index 3e792d5..5637040 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -44,7 +44,10 @@ tbbproxy: tbbproxy_release tbbproxy_debug
 
 rml: rml_release rml_debug
 
-test: tbbmalloc_test_release $(if $(use_proxy),tbbproxy_test_release) rml_test_release tbb_test_release tbbmalloc_test_debug $(if $(use_proxy),tbbproxy_test_debug) rml_test_debug tbb_test_debug
+test: tbbmalloc_test_release $(if $(use_proxy),tbbproxy_test_release) tbb_test_release tbbmalloc_test_debug $(if $(use_proxy),tbbproxy_test_debug) tbb_test_debug
+ifneq (android,$(target))
+test: rml_test_debug rml_test_release
+endif
 
 tbb_test_no_depends: tbbmalloc_test_release_no_depends $(if $(use_proxy),tbbproxy_test_release_no_depends) tbb_test_release_no_depends tbbmalloc_test_debug_no_depends $(if $(use_proxy),tbbproxy_test_debug_no_depends) tbb_test_debug_no_depends
 	@echo done
@@ -64,54 +67,58 @@ clean: clean_release clean_debug clean_examples
 
 .PHONY: full
 full:
-	$(MAKE) -s -i -r --no-print-directory -f Makefile tbb_root=. clean all
+	$(MAKE) -sir --no-print-directory -f Makefile tbb_root=.. clean all
 ifeq ($(tbb_os),windows)
-	$(MAKE) -s -i -r --no-print-directory -f Makefile tbb_root=. compiler=icl clean all native_examples
+	$(MAKE) -sir --no-print-directory -f Makefile tbb_root=.. compiler=icl clean all native_examples
 else
-	$(MAKE) -s -i -r --no-print-directory -f Makefile tbb_root=. compiler=icc clean all native_examples
+	$(MAKE) -sir --no-print-directory -f Makefile tbb_root=.. compiler=icc clean all native_examples
 endif
 ifeq ($(arch),intel64)
-	$(MAKE) -s -i -r --no-print-directory -f Makefile tbb_root=. arch=ia32 clean all
+	$(MAKE) -sir --no-print-directory -f Makefile tbb_root=.. arch=ia32 clean all
 endif
 # it doesn't test compiler=icc arch=ia32 on intel64 systems due to enviroment settings of icc
 
 native_examples: tbb tbbmalloc
-	$(MAKE) -C $(examples_root) -r -f Makefile tbb_root=.. compiler=$(native_compiler) tbb_build_prefix=$(tbb_build_prefix) debug test
-	$(MAKE) -C $(examples_root) -r -f Makefile tbb_root=.. compiler=$(native_compiler) tbb_build_prefix=$(tbb_build_prefix) clean release test
+	$(MAKE) -C $(examples_root) -r -f Makefile tbb_root=.. compiler=$(native_compiler) debug test
+	$(MAKE) -C $(examples_root) -r -f Makefile tbb_root=.. compiler=$(native_compiler) clean release test
 
 ../examples/% examples/%::
 	$(MAKE) -C $(examples_root) -r -f Makefile tbb_root=.. $(subst examples/,,$(subst ../,,$@))
 
 debug_%:: cfg?=debug
-debug_%:: run_cmd=$(debugger)
+debug_%:: export run_cmd=$(debugger)
 test_% stress_% time_% perf_%:: cfg?=release
+debug_malloc_% test_malloc_% debug_ScalableAlloc% test_ScalableAlloc%:: TESTFILE=tbbmalloc
+debug_rml_% test_rml_%:: TESTFILE=rml
+debug_runtime_load% test_runtime_load%:: TESTFILE=tbbproxy
+debug_% test_% stress_% time_% perf_%:: TESTFILE?=test
 debug_% test_% stress_% time_% perf_%::
-	$(MAKE) -C "$(work_dir)_$(cfg)"  -r -f $(tbb_root)/build/Makefile.test cfg=$(cfg) run_cmd="$(run_cmd)" tbb_root=$(tbb_root) $@
+	$(MAKE) -C "$(work_dir)_$(cfg)" -r -f $(tbb_root)/build/Makefile.$(TESTFILE) cfg=$(cfg) $@
 
 clean_%::
 ifeq ($(cfg),)
-	@$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.test cfg=release tbb_root=$(tbb_root) $@
-	@$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.test cfg=debug tbb_root=$(tbb_root) $@
+	@$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.test cfg=release $@
+	@$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.test cfg=debug $@
 else
-	@$(MAKE) -C "$(work_dir)_$(cfg)"  -r -f $(tbb_root)/build/Makefile.test cfg=$(cfg) tbb_root=$(tbb_root) $@
+	@$(MAKE) -C "$(work_dir)_$(cfg)"  -r -f $(tbb_root)/build/Makefile.test $@
 endif
 
 .PHONY: test_release test_debug test_release_no_depends test_debug_no_depends
 .PHONY: tbb_release tbb_debug tbb_test_release tbb_test_debug tbb_test_release_no_depends tbb_test_debug_no_depends
 # do not delete double-space after -C option
 tbb_release: mkdir_release
-	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbb cfg=release tbb_root=$(tbb_root)
+	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbb cfg=release
 
 tbb_debug: mkdir_debug
-	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbb cfg=debug tbb_root=$(tbb_root)
+	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbb cfg=debug
 
 tbb_test_release: $(call cross_cfg,tbb_release) $(if $(use_proxy),$(call cross_cfg,tbbproxy_release)) tbb_test_release_no_depends
 tbb_test_release_no_depends:$(call cross_cfg,mkdir_release)
-	$(MAKE) -C "$(call cross_cfg,$(work_dir)_release)"  -r -f $(tbb_root)/build/Makefile.test cfg=release tbb_root=$(tbb_root)
+	$(MAKE) -C "$(call cross_cfg,$(work_dir)_release)"  -r -f $(tbb_root)/build/Makefile.test cfg=release
 
 tbb_test_debug: $(call cross_cfg,tbb_debug) $(if $(use_proxy),$(call cross_cfg,tbbproxy_debug)) tbb_test_debug_no_depends
 tbb_test_debug_no_depends:$(call cross_cfg,mkdir_debug)
-	$(MAKE) -C "$(call cross_cfg,$(work_dir)_debug)"  -r -f $(tbb_root)/build/Makefile.test cfg=debug tbb_root=$(tbb_root)
+	$(MAKE) -C "$(call cross_cfg,$(work_dir)_debug)"  -r -f $(tbb_root)/build/Makefile.test cfg=debug
 # backward compatibility
 test_release: tbb_test_release
 test_debug: tbb_test_debug
@@ -123,68 +130,68 @@ test_debug_no_depends: tbb_test_debug_no_depends
 .PHONY: tbbmalloc_test tbbmalloc_test_release tbbmalloc_test_debug tbbmalloc_test_release_no_depends tbbmalloc_test_debug_no_depends
 
 tbbmalloc_release: mkdir_release
-	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc tbb_root=$(tbb_root)
+	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc
 
 tbbmalloc_debug: mkdir_debug
-	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc tbb_root=$(tbb_root)
+	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc
 
 tbbmalloc_dll_release: mkdir_release
-	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc_dll tbb_root=$(tbb_root)
+	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc_dll
 
 tbbmalloc_proxy_dll_release: mkdir_release
-	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release  malloc_proxy_dll tbb_root=$(tbb_root)
+	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release  malloc_proxy_dll
 
 tbbmalloc_dll_debug: mkdir_debug
-	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc_dll tbb_root=$(tbb_root)
+	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc_dll
 
 tbbmalloc_proxy_dll_debug: mkdir_debug
-	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc_proxy_dll tbb_root=$(tbb_root)
+	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc_proxy_dll
 
 tbbmalloc_test: tbbmalloc_test_release tbbmalloc_test_debug
 
 tbbmalloc_test_release: $(call cross_cfg,tbbmalloc_release) tbbmalloc_test_release_no_depends
 tbbmalloc_test_release_no_depends: $(call cross_cfg,mkdir_release)
-	$(MAKE) -C "$(call cross_cfg,$(work_dir)_release)"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc_test_no_depends tbb_root=$(tbb_root)
+	$(MAKE) -C "$(call cross_cfg,$(work_dir)_release)"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc_test_no_depends
 
 tbbmalloc_test_debug: $(call cross_cfg,tbbmalloc_debug) tbbmalloc_test_debug_no_depends
 tbbmalloc_test_debug_no_depends: $(call cross_cfg,mkdir_debug)
-	$(MAKE) -C "$(call cross_cfg,$(work_dir)_debug)"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc_test_no_depends tbb_root=$(tbb_root)
+	$(MAKE) -C "$(call cross_cfg,$(work_dir)_debug)"  -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc_test_no_depends
 
 .PHONY: tbbproxy_release tbbproxy_debug
 .PHONY: tbbproxy_test tbbproxy_test_release tbbproxy_test_debug tbbproxy_test_release_no_depends tbbproxy_test_debug_no_depends
 
 tbbproxy_release: mkdir_release tbb_release
-	$(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.tbbproxy cfg=release tbbproxy tbb_root=$(tbb_root)
+	$(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.tbbproxy cfg=release tbbproxy
 
 tbbproxy_debug: mkdir_debug tbb_debug
-	$(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.tbbproxy cfg=debug tbbproxy tbb_root=$(tbb_root)
+	$(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.tbbproxy cfg=debug tbbproxy
 
 tbbproxy_test: tbbproxy_test_release tbbproxy_test_debug
 
 tbbproxy_test_release: $(call cross_cfg,tbb_release) $(call cross_cfg,tbbproxy_release) tbbproxy_test_release_no_depends
 tbbproxy_test_release_no_depends:$(call cross_cfg,mkdir_release)
-	$(MAKE) -C "$(call cross_cfg,$(work_dir)_release)" -r -f $(tbb_root)/build/Makefile.tbbproxy cfg=release tbbproxy_test tbb_root=$(tbb_root)
+	$(MAKE) -C "$(call cross_cfg,$(work_dir)_release)" -r -f $(tbb_root)/build/Makefile.tbbproxy cfg=release tbbproxy_test
 
 tbbproxy_test_debug: $(call cross_cfg,tbb_debug) $(call cross_cfg,tbbproxy_debug) tbbproxy_test_debug_no_depends
 tbbproxy_test_debug_no_depends: $(call cross_cfg,mkdir_debug)
-	$(MAKE) -C "$(call cross_cfg,$(work_dir)_debug)"   -r -f $(tbb_root)/build/Makefile.tbbproxy cfg=debug   tbbproxy_test tbb_root=$(tbb_root)
+	$(MAKE) -C "$(call cross_cfg,$(work_dir)_debug)"   -r -f $(tbb_root)/build/Makefile.tbbproxy cfg=debug tbbproxy_test
 
 .PHONY: rml_release rml_debug rml_test_release rml_test_debug
 .PHONY: rml_test_release_no_depends rml_test_debug_no_depends
 
 rml_release: mkdir_release
-	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.rml cfg=release tbb_root=$(tbb_root) rml
+	$(MAKE) -C "$(work_dir)_release"  -r -f $(tbb_root)/build/Makefile.rml cfg=release rml
 
 rml_debug: mkdir_debug
-	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.rml cfg=debug tbb_root=$(tbb_root) rml
+	$(MAKE) -C "$(work_dir)_debug"  -r -f $(tbb_root)/build/Makefile.rml cfg=debug rml
 
 rml_test_release: $(call cross_cfg,rml_release) rml_test_release_no_depends
 rml_test_release_no_depends: $(call cross_cfg,mkdir_release)
-	$(MAKE) -C "$(call cross_cfg,$(work_dir)_release)"  -r -f $(tbb_root)/build/Makefile.rml cfg=release rml_test tbb_root=$(tbb_root)
+	$(MAKE) -C "$(call cross_cfg,$(work_dir)_release)"  -r -f $(tbb_root)/build/Makefile.rml cfg=release rml_test
 
 rml_test_debug: $(call cross_cfg,rml_debug) rml_test_debug_no_depends
 rml_test_debug_no_depends: $(call cross_cfg,mkdir_debug)
-	$(MAKE) -C "$(call cross_cfg,$(work_dir)_debug)"  -r -f $(tbb_root)/build/Makefile.rml cfg=debug rml_test tbb_root=$(tbb_root)
+	$(MAKE) -C "$(call cross_cfg,$(work_dir)_debug)"  -r -f $(tbb_root)/build/Makefile.rml cfg=debug rml_test
 
 .PHONY: examples_release examples_debug examples_release_no_depends examples_debug_no_depends
 
diff --git a/src/old/concurrent_vector_v2.h b/src/old/concurrent_vector_v2.h
index 6754ac8..f6f7c2a 100644
--- a/src/old/concurrent_vector_v2.h
+++ b/src/old/concurrent_vector_v2.h
@@ -373,7 +373,7 @@ public:
     //! Grow by "delta" elements.
     /** Returns old size. */
     size_type grow_by( size_type delta ) {
-        return delta ? internal_grow_by( delta, sizeof(T), &initialize_array ) : my_early_size;
+        return delta ? internal_grow_by( delta, sizeof(T), &initialize_array ) : my_early_size.load();
     }
 
     //! Grow array until it has at least n elements.
diff --git a/src/tbb/arena.cpp b/src/tbb/arena.cpp
index 469c653..0061f85 100644
--- a/src/tbb/arena.cpp
+++ b/src/tbb/arena.cpp
@@ -427,7 +427,7 @@ bool arena::is_out_of_work() {
                         if( work_absent ) {
 #if __TBB_TASK_PRIORITY
                             if ( top_priority > my_bottom_priority ) {
-                                if ( my_market->lower_arena_priority(*this, top_priority - 1, top_priority)
+                                if ( my_market->lower_arena_priority(*this, top_priority - 1, reload_epoch)
                                      && !my_task_stream[top_priority].empty() )
                                 {
                                     atomic_update( my_skipped_fifo_priority, top_priority, std::less<intptr_t>());
diff --git a/src/tbb/dynamic_link.cpp b/src/tbb/dynamic_link.cpp
index 32e9243..c226105 100644
--- a/src/tbb/dynamic_link.cpp
+++ b/src/tbb/dynamic_link.cpp
@@ -66,17 +66,60 @@
 
 #include "tbb/tbb_misc.h"
 
-#define __USE_TBB_ATOMICS ( !(__linux__&&__ia64__) || __TBB_BUILD )
+#define __USE_TBB_ATOMICS       ( !(__linux__&&__ia64__) || __TBB_BUILD )
+#define __USE_STATIC_DL_INIT    (!__ANDROID__)
 
 #if !__USE_TBB_ATOMICS
 #include <pthread.h>
 #endif
 
+/*
+dynamic_link is a common interface for searching for required symbols in an
+executable and dynamic libraries.
+
+dynamic_link provides certain guarantees:
+  1. Either all or none of the requested symbols are resolved. Moreover, if
+  symbols are not resolved, the dynamic_link_descriptor table is not modified;
+  2. All returned symbols have secured life time: this means that none of them
+  can be invalidated until dynamic_unlink is called;
+  3. Any loaded library is loaded only via the full path. The full path is that
+  from which the runtime itself was loaded. (This is done to avoid security
+  issues caused by loading libraries from insecure paths).
+
+dynamic_link searches for the requested symbols in three stages, stopping as
+soon as all of the symbols have been resolved.
+
+  1. Search the global scope:
+    a. On Windows: dynamic_link tries to obtain the handle of the requested
+    library and if it succeeds it resolves the symbols via that handle.
+    b. On Linux: dynamic_link tries to search for the symbols in the global
+    scope via the main program handle. If the symbols are present in the global
+    scope their life time is not guaranteed (since dynamic_link does not know
+    anything about the library from which they are exported). Therefore it
+    tries to "pin" the symbols by obtaining the library name and reopening it.
+    dlopen may fail to reopen the library in two cases:
+       i. The symbols are exported from the executable. Currently dynamic _link
+      cannot handle this situation, so it will not find these symbols in this
+      step.
+      ii. The necessary library has been unloaded and cannot be reloaded. It
+      seems there is nothing that can be done in this case. No symbols are
+      returned.
+
+  2. Dynamic load: an attempt is made to load the requested library via the
+  full path.
+    The full path used is that from which the runtime itself was loaded. If the
+    library can be loaded, then an attempt is made to resolve the requested
+    symbols in the newly loaded library.
+    If the symbols are not found the library is unloaded.
+
+  3. Weak symbols: if weak symbols are available they are returned.
+*/
+
 OPEN_INTERNAL_NAMESPACE
 
 #if __TBB_WEAK_SYMBOLS_PRESENT || __TBB_DYNAMIC_LOAD_ENABLED
 
-#if !defined(DYNAMIC_LINK_WARNING)
+#if !defined(DYNAMIC_LINK_WARNING) && !__TBB_WIN8UI_SUPPORT
     // Report runtime errors and continue.
     #define DYNAMIC_LINK_WARNING dynamic_link_warning
     static void dynamic_link_warning( dynamic_link_error_t code, ... ) {
@@ -102,7 +145,6 @@ OPEN_INTERNAL_NAMESPACE
             dynamic_link_descriptor const & desc = descriptors[k];
             pointer_to_handler addr = (pointer_to_handler)dlsym( module, desc.name );
             if ( !addr ) {
-                DYNAMIC_LINK_WARNING( dl_sym_not_found, desc.name, dlerror() );
                 return false;
             }
             h[k] = addr;
@@ -309,7 +351,9 @@ OPEN_INTERNAL_NAMESPACE
     static class _static_init_dl_data {
     public:
         _static_init_dl_data() {
+    #if __USE_STATIC_DL_INIT
             atomic_once( &init_dl_data, init_dl_data_state );
+    #endif
         }
     #if !__USE_TBB_ATOMICS
         ~_static_init_dl_data() {
@@ -384,7 +428,18 @@ OPEN_INTERNAL_NAMESPACE
         handles.free_handles();
     }
 
-    #if !_WIN32
+    #if _WIN32
+    static dynamic_link_handle global_symbols_link( const char* library, const dynamic_link_descriptor descriptors[], size_t required ) {
+        dynamic_link_handle library_handle;
+        if ( GetModuleHandleEx( 0, library, &library_handle ) ) {
+            if ( resolve_symbols( library_handle, descriptors, required ) )
+                return library_handle;
+            else
+                FreeLibrary( library_handle );
+        }
+        return 0;
+    }
+    #else /* _WIN32 */
     // It is supposed that all symbols are from the only one library
     static dynamic_link_handle pin_symbols( dynamic_link_descriptor desc, const dynamic_link_descriptor descriptors[], size_t required ) {
         // The library has been loaded by another module and contains at least one requested symbol.
@@ -416,29 +471,20 @@ OPEN_INTERNAL_NAMESPACE
         }
         return library_handle;
     }
-    #endif /* _WIN32 */
 
-    static dynamic_link_handle global_symbols_link( const char* library, const dynamic_link_descriptor descriptors[], size_t required ) {
-    #if _WIN32
-        dynamic_link_handle library_handle;
-        if ( GetModuleHandleEx( 0, library, &library_handle ) ) {
-            if ( resolve_symbols( library_handle, descriptors, required ) )
-                return library_handle;
-            else
-                FreeLibrary( library_handle );
-        }
-    #else /* _WIN32 */
+    static dynamic_link_handle global_symbols_link( const char*, const dynamic_link_descriptor descriptors[], size_t required ) {
     #if __TBB_WEAK_SYMBOLS_PRESENT
         if ( !dlopen ) return 0;
     #endif /* __TBB_WEAK_SYMBOLS_PRESENT */
         dynamic_link_handle library_handle = dlopen( NULL, RTLD_LAZY );
         // Check existence of only the first symbol, then use it to find the library and load all necessary symbols
-        dynamic_link_descriptor desc = descriptors[0];
+        pointer_to_handler handler;
+        dynamic_link_descriptor desc = { descriptors[0].name, &handler };
         if ( resolve_symbols( library_handle, &desc, 1 ) )
                 return pin_symbols( desc, descriptors, required );
-    #endif /* _WIN32 */
         return 0;
     }
+    #endif /* _WIN32 */
 
     static void save_library_handle( dynamic_link_handle src, dynamic_link_handle *dst ) {
         if ( dst )
diff --git a/src/tbb/dynamic_link.h b/src/tbb/dynamic_link.h
index 0b58516..56bfbd9 100644
--- a/src/tbb/dynamic_link.h
+++ b/src/tbb/dynamic_link.h
@@ -87,15 +87,20 @@ const int DYNAMIC_LINK_WEAK   = 0x04;
 const int DYNAMIC_LINK_ALL    = DYNAMIC_LINK_GLOBAL | DYNAMIC_LINK_LOAD | DYNAMIC_LINK_WEAK;
 
 //! Fill in dynamically linked handlers.
-/** 'required' is the number of the initial entries in the array descriptors[]
+/** 'library' is the name of the requested library. It should not contain a full
+    path since dynamic_link adds the full path (from which the runtime itself
+    was loaded) to the library name.
+    'required' is the number of the initial entries in the array descriptors[]
     that have to be found in order for the call to succeed. If the library and
-    all the required handlers are found, then the corresponding handler pointers
-    are set, and the return value is true.  Otherwise the original array of
-    descriptors is left untouched and the return value is false. 'required' is
-    limited by 20 (exceeding of this value will result in failure to load the
-    symbols and the return value will be false).
-    'dl_allowed' flag allows dynamic library loading if the global symbols
-    searching mechanism has failed.
+    all the required handlers are found, then the corresponding handler
+    pointers are set, and the return value is true.  Otherwise the original
+    array of descriptors is left untouched and the return value is false.
+    'required' is limited by 20 (exceeding of this value will result in failure
+    to load the symbols and the return value will be false).
+    'handle' is the handle of the library if it is loaded. Otherwise it is left
+    untouched.
+    'flags' is the set of DYNAMIC_LINK_* flags. Each of the DYNAMIC_LINK_* flags
+    allows its corresponding linking stage.
 **/
 bool dynamic_link( const char* library,
                    const dynamic_link_descriptor descriptors[],
diff --git a/src/tbb/governor.cpp b/src/tbb/governor.cpp
index bb95f73..0f841f9 100644
--- a/src/tbb/governor.cpp
+++ b/src/tbb/governor.cpp
@@ -94,7 +94,7 @@ void governor::release_resources () {
 #endif
     int status = theTLS.destroy();
     if( status )
-        handle_perror(status, "TBB failed to destroy task scheduler TLS");
+        runtime_warning("failed to destroy task scheduler TLS: %s", strerror(status));
     dynamic_unlink_all();
 }
 
diff --git a/src/tbbmalloc/win32-tbbmalloc-export.def b/src/tbb/ia32-masm/itsx.asm
similarity index 63%
copy from src/tbbmalloc/win32-tbbmalloc-export.def
copy to src/tbb/ia32-masm/itsx.asm
index 43c97d7..5069c09 100644
--- a/src/tbbmalloc/win32-tbbmalloc-export.def
+++ b/src/tbb/ia32-masm/itsx.asm
@@ -24,29 +24,25 @@
 ; invalidate any other reasons why the executable file might be covered by
 ; the GNU General Public License.
 
-EXPORTS
-
-; frontend.cpp
-scalable_calloc
-scalable_free
-scalable_malloc
-scalable_realloc
-scalable_posix_memalign
-scalable_aligned_malloc
-scalable_aligned_realloc
-scalable_aligned_free
-safer_scalable_free
-safer_scalable_realloc
-scalable_msize
-scalable_allocation_mode
-safer_scalable_msize
-safer_scalable_aligned_realloc
-?pool_create at rml@@YAPAVMemoryPool at 1@HPBUMemPoolPolicy at 1@@Z
-?pool_create_v1 at rml@@YA?AW4MemPoolError at 1@HPBUMemPoolPolicy at 1@PAPAVMemoryPool at 1@@Z
-?pool_destroy at rml@@YA_NPAVMemoryPool at 1@@Z
-?pool_malloc at rml@@YAPAXPAVMemoryPool at 1@I at Z
-?pool_free at rml@@YA_NPAVMemoryPool at 1@PAX at Z
-?pool_reset at rml@@YA_NPAVMemoryPool at 1@@Z
-?pool_realloc at rml@@YAPAXPAVMemoryPool at 1@PAXI at Z
-?pool_aligned_realloc at rml@@YAPAXPAVMemoryPool at 1@PAXII at Z
-?pool_aligned_malloc at rml@@YAPAXPAVMemoryPool at 1@II at Z
+.686
+.model flat,c
+.code
+        ALIGN 4
+        PUBLIC c __TBB_machine_try_lock_elided
+__TBB_machine_try_lock_elided:
+        mov ecx, 4[esp]
+        xor eax, eax
+        mov al, 1
+        BYTE 0F2H
+        xchg al, byte ptr [ecx]
+        xor  al, 1
+        ret
+.code
+        ALIGN 4
+        PUBLIC c __TBB_machine_unlock_elided
+__TBB_machine_unlock_elided:
+        mov ecx, 4[esp]
+        BYTE 0F3H
+        mov byte ptr [ecx], 0
+        ret
+end
diff --git a/src/tbbmalloc/win32-tbbmalloc-export.def b/src/tbb/intel64-masm/itsx.asm
similarity index 63%
copy from src/tbbmalloc/win32-tbbmalloc-export.def
copy to src/tbb/intel64-masm/itsx.asm
index 43c97d7..2acec64 100644
--- a/src/tbbmalloc/win32-tbbmalloc-export.def
+++ b/src/tbb/intel64-masm/itsx.asm
@@ -24,29 +24,21 @@
 ; invalidate any other reasons why the executable file might be covered by
 ; the GNU General Public License.
 
-EXPORTS
-
-; frontend.cpp
-scalable_calloc
-scalable_free
-scalable_malloc
-scalable_realloc
-scalable_posix_memalign
-scalable_aligned_malloc
-scalable_aligned_realloc
-scalable_aligned_free
-safer_scalable_free
-safer_scalable_realloc
-scalable_msize
-scalable_allocation_mode
-safer_scalable_msize
-safer_scalable_aligned_realloc
-?pool_create at rml@@YAPAVMemoryPool at 1@HPBUMemPoolPolicy at 1@@Z
-?pool_create_v1 at rml@@YA?AW4MemPoolError at 1@HPBUMemPoolPolicy at 1@PAPAVMemoryPool at 1@@Z
-?pool_destroy at rml@@YA_NPAVMemoryPool at 1@@Z
-?pool_malloc at rml@@YAPAXPAVMemoryPool at 1@I at Z
-?pool_free at rml@@YA_NPAVMemoryPool at 1@PAX at Z
-?pool_reset at rml@@YA_NPAVMemoryPool at 1@@Z
-?pool_realloc at rml@@YAPAXPAVMemoryPool at 1@PAXI at Z
-?pool_aligned_realloc at rml@@YAPAXPAVMemoryPool at 1@PAXII at Z
-?pool_aligned_malloc at rml@@YAPAXPAVMemoryPool at 1@II at Z
+.code
+        ALIGN 8
+        PUBLIC __TBB_machine_try_lock_elided
+__TBB_machine_try_lock_elided:
+        xor  rax, rax
+        mov  al, 1
+        BYTE 0F2H
+        xchg al, byte ptr [rcx]
+        xor  al, 1
+        ret
+.code
+        ALIGN 8
+        PUBLIC __TBB_machine_unlock_elided
+__TBB_machine_unlock_elided:
+        BYTE 0F3H
+        mov  byte ptr [rcx], 0
+        ret
+end
diff --git a/src/tbb/market.cpp b/src/tbb/market.cpp
index b88fcb6..92409a9 100644
--- a/src/tbb/market.cpp
+++ b/src/tbb/market.cpp
@@ -534,9 +534,9 @@ void market::update_arena_top_priority ( arena& a, intptr_t new_priority ) {
     __TBB_ASSERT( prev_level.workers_requested >= 0 && new_level.workers_requested >= 0, NULL );
 }
 
-bool market::lower_arena_priority ( arena& a, intptr_t new_priority, intptr_t old_priority ) {
+bool market::lower_arena_priority ( arena& a, intptr_t new_priority, uintptr_t old_reload_epoch ) {
     arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex);
-    if ( a.my_top_priority != old_priority ) {
+    if ( a.my_reload_epoch != old_reload_epoch ) {
         assert_market_valid();
         return false;
     }
diff --git a/src/tbb/market.h b/src/tbb/market.h
index 6f51715..d0a040d 100644
--- a/src/tbb/market.h
+++ b/src/tbb/market.h
@@ -314,7 +314,7 @@ public:
 #if __TBB_TASK_PRIORITY
     //! Lowers arena's priority is not higher than newPriority 
     /** Returns true if arena priority was actually elevated. **/ 
-    bool lower_arena_priority ( arena& a, intptr_t new_priority, intptr_t old_priority );
+    bool lower_arena_priority ( arena& a, intptr_t new_priority, uintptr_t old_reload_epoch );
 
     //! Makes sure arena's priority is not lower than newPriority 
     /** Returns true if arena priority was elevated. Also updates arena's bottom
diff --git a/src/tbb/scheduler.h b/src/tbb/scheduler.h
index 89f08e8..623416f 100644
--- a/src/tbb/scheduler.h
+++ b/src/tbb/scheduler.h
@@ -470,7 +470,7 @@ public:
     //! Finds all contexts registered by this scheduler affected by the state change
     //! and propagates the new state to them.
     template <typename T>
-    void propagate_task_group_state ( T task_group_context::*mptr_state, T new_state );
+    void propagate_task_group_state ( T task_group_context::*mptr_state, task_group_context& src, T new_state );
 #endif /* __TBB_TASK_GROUP_CONTEXT */
 
 #if _WIN32||_WIN64
diff --git a/src/tbb/task.cpp b/src/tbb/task.cpp
index 85794fc..58f8e5f 100644
--- a/src/tbb/task.cpp
+++ b/src/tbb/task.cpp
@@ -226,7 +226,10 @@ void interface5::internal::task_base::destroy( task& victim ) {
     task* parent = victim.parent();
     victim.~task();
     if( parent ) {
-        __TBB_ASSERT( parent->state()==task::allocated, "attempt to destroy child of running or corrupted parent?" );
+        __TBB_ASSERT( parent->state()!=task::freed && parent->state()!=task::ready,
+                      "attempt to destroy child of running or corrupted parent?" );
+        // 'reexecute' and 'executing' are also signs of a race condition, since most tasks
+        // set their ref_count upon entry but "es_ref_count_active" should detect this
         parent->internal_decrement_ref_count();
         // Even if the last reference to *parent is removed, it should not be spawned (documented behavior).
     }
diff --git a/src/tbb/task_group_context.cpp b/src/tbb/task_group_context.cpp
index 4026243..edd983a 100644
--- a/src/tbb/task_group_context.cpp
+++ b/src/tbb/task_group_context.cpp
@@ -225,6 +225,7 @@ void task_group_context::init () {
 void task_group_context::register_with ( generic_scheduler *local_sched ) {
     __TBB_ASSERT( local_sched, NULL );
     my_owner = local_sched;
+    // state propagation logic assumes new contexts are bound to head of the list
     my_node.my_prev = &local_sched->my_context_list_head;
     // Notify threads that may be concurrently destroying contexts registered
     // in this scheduler's list that local list update is underway.
@@ -260,7 +261,7 @@ void task_group_context::bind_to ( generic_scheduler *local_sched ) {
 
     // Condition below prevents unnecessary thrashing parent context's cache line
     if ( !(my_parent->my_state & may_have_children) )
-        my_parent->my_state |= may_have_children;
+        my_parent->my_state |= may_have_children; // full fence is below
     if ( my_parent->my_parent ) {
         // Even if this context were made accessible for state change propagation
         // (by placing __TBB_store_with_release(s->my_context_list_head.my_next, &my_node)
@@ -310,21 +311,36 @@ void task_group_context::bind_to ( generic_scheduler *local_sched ) {
 
 #if __TBB_TASK_GROUP_CONTEXT
 template <typename T>
-void task_group_context::propagate_state_from_ancestors ( T task_group_context::*mptr_state, T new_state ) {
-    task_group_context *ancestor = my_parent;
-    while ( ancestor && ancestor->*mptr_state != new_state )
-        ancestor = ancestor->my_parent;
-    if ( ancestor ) {
-        task_group_context *ctx = this;
-        do {
-            ctx->*mptr_state = new_state;
-            ctx = ctx->my_parent;
-        } while ( ctx != ancestor );
+void task_group_context::propagate_task_group_state ( T task_group_context::*mptr_state, task_group_context& src, T new_state ) {
+    if (this->*mptr_state == new_state) {
+        // Nothing to do, whether descending from "src" or not, so no need to scan.
+        // Hopefully this happens often thanks to earlier invocations,
+        // and it is more relevant for set_priority() than for cancel_group_execution()
+        // because the latter will also stop at an already cancelled ancestor.
+        // This optimisation is enabled by LIFO order in the context lists:
+        // - new contexts are bound to the beginning of lists;
+        // - descendants are newer than ancestors;
+        // - earlier invocations are therefore likely to "paint" long chains.
+    }
+    else if (this == &src) {
+        // This clause is disjunct from the traversal below, which skips src entirely.
+        // Note that src.*mptr_state is not necessarily still equal to new_state (another thread may have changed it again).
+        // Such interference is probably not frequent enough to aim for optimisation by writing new_state again (to make the other thread back down).
+        // Letting the other thread prevail may also be fairer.
+    }
+    else {
+        for ( task_group_context *ancestor = my_parent; ancestor != NULL; ancestor = ancestor->my_parent ) {
+            if ( ancestor == &src ) {
+                for ( task_group_context *ctx = this; ctx != ancestor; ctx = ctx->my_parent )
+                    ctx->*mptr_state = new_state;
+                break;
+            }
+        }
     }
 }
 
 template <typename T>
-void generic_scheduler::propagate_task_group_state ( T task_group_context::*mptr_state, T new_state ) {
+void generic_scheduler::propagate_task_group_state ( T task_group_context::*mptr_state, task_group_context& src, T new_state ) {
     spin_mutex::scoped_lock lock(my_context_list_mutex);
     // Acquire fence is necessary to ensure that the subsequent node->my_next load 
     // returned the correct value in case it was just inserted in another thread.
@@ -333,7 +349,7 @@ void generic_scheduler::propagate_task_group_state ( T task_group_context::*mptr
     while ( node != &my_context_list_head ) {
         task_group_context &ctx = __TBB_get_object_ref(task_group_context, my_node, node);
         if ( ctx.*mptr_state != new_state )
-            ctx.propagate_state_from_ancestors( mptr_state, new_state );
+            ctx.propagate_task_group_state( mptr_state, src, new_state );
         node = node->my_next;
         __TBB_ASSERT( is_alive(ctx.my_version_and_traits), "Local context list contains destroyed object" );
     }
@@ -348,12 +364,11 @@ bool market::propagate_task_group_state ( T task_group_context::*mptr_state, tas
         return true;
     // The whole propagation algorithm is under the lock in order to ensure correctness 
     // in case of concurrent state changes at the different levels of the context tree.
-    // See the note 3 at the bottom of scheduler.cpp
+    // See comment at the bottom of scheduler.cpp
     context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex);
     if ( src.*mptr_state != new_state )
-        // Another thread has concurrently changed the state. Back off.
+        // Another thread has concurrently changed the state. Back down.
         return false;
-    src.*mptr_state = new_state;
     // Advance global state propagation epoch
     __TBB_FetchAndAddWrelease(&the_context_state_propagation_epoch, 1);
     // Propagate to all workers and masters and sync up their local epochs with the global one
@@ -362,7 +377,7 @@ bool market::propagate_task_group_state ( T task_group_context::*mptr_state, tas
         generic_scheduler *s = my_workers[i];
         // If the worker is only about to be registered, skip it.
         if ( s )
-            s->propagate_task_group_state( mptr_state, new_state );
+            s->propagate_task_group_state( mptr_state, src, new_state );
     }
     // Propagate to all master threads (under my_arenas_list_mutex lock)
     ForEachArena(a) {
@@ -374,7 +389,7 @@ bool market::propagate_task_group_state ( T task_group_context::*mptr_state, tas
             __TBB_ASSERT( slot.my_scheduler == LockedMaster, NULL );
             // The whole propagation sequence is locked, thus no contention is expected
             __TBB_ASSERT( s != LockedMaster, NULL );
-            s->propagate_task_group_state( mptr_state, new_state );
+            s->propagate_task_group_state( mptr_state, src, new_state );
             __TBB_store_with_release( slot.my_scheduler, s );
         }
     } EndForEach();
@@ -390,7 +405,9 @@ bool arena::propagate_task_group_state ( T task_group_context::*mptr_state, task
 bool task_group_context::cancel_group_execution () {
     __TBB_ASSERT ( my_cancellation_requested == 0 || my_cancellation_requested == 1, "Invalid cancellation state");
     if ( my_cancellation_requested || as_atomic(my_cancellation_requested).compare_and_swap(1, 0) ) {
-        // This task group has already been canceled
+        // This task group and any descendants have already been canceled.
+        // (A newly added descendant would inherit its parent's my_cancellation_requested,
+        // not missing out on any cancellation still being propagated, and a context cannot be uncanceled.)
         return false;
     }
     governor::local_scheduler()->my_arena->propagate_task_group_state( &task_group_context::my_cancellation_requested, *this, (uintptr_t)1 );
@@ -427,7 +444,7 @@ void task_group_context::register_pending_exception () {
 void task_group_context::set_priority ( priority_t prio ) {
     __TBB_ASSERT( prio == priority_low || prio == priority_normal || prio == priority_high, "Invalid priority level value" );
     intptr_t p = normalize_priority(prio);
-    if ( my_priority == p )
+    if ( my_priority == p && !(my_state & task_group_context::may_have_children))
         return;
     my_priority = p;
     internal::generic_scheduler* s = governor::local_scheduler_if_initialized();
diff --git a/src/tbb/win32-tbb-export.lst b/src/tbb/win32-tbb-export.lst
index 0e865c8..c1813ac 100644
--- a/src/tbb/win32-tbb-export.lst
+++ b/src/tbb/win32-tbb-export.lst
@@ -42,6 +42,8 @@ __TBB_SYMBOL( __TBB_machine_fetchstore8 )
 __TBB_SYMBOL( __TBB_machine_store8 )
 __TBB_SYMBOL( __TBB_machine_load8 )
 __TBB_SYMBOL( __TBB_machine_trylockbyte )
+__TBB_SYMBOL( __TBB_machine_try_lock_elided )
+__TBB_SYMBOL( __TBB_machine_unlock_elided )
 
 // cache_aligned_allocator.cpp
 __TBB_SYMBOL( ?NFS_Allocate at internal@tbb@@YAPAXIIPAX at Z )
diff --git a/src/tbb/win64-tbb-export.lst b/src/tbb/win64-tbb-export.lst
index bddf731..597b516 100644
--- a/src/tbb/win64-tbb-export.lst
+++ b/src/tbb/win64-tbb-export.lst
@@ -37,6 +37,8 @@ __TBB_SYMBOL( __TBB_machine_cmpswp2 )
 __TBB_SYMBOL( __TBB_machine_fetchadd2 )
 __TBB_SYMBOL( __TBB_machine_fetchstore2 )
 __TBB_SYMBOL( __TBB_machine_pause )
+__TBB_SYMBOL( __TBB_machine_try_lock_elided )
+__TBB_SYMBOL( __TBB_machine_unlock_elided )
 
 // cache_aligned_allocator.cpp
 __TBB_SYMBOL( ?NFS_Allocate at internal@tbb@@YAPEAX_K0PEAX at Z )
diff --git a/src/tbbmalloc/Customize.h b/src/tbbmalloc/Customize.h
index c27604f..135c56c 100644
--- a/src/tbbmalloc/Customize.h
+++ b/src/tbbmalloc/Customize.h
@@ -86,6 +86,7 @@ public:
     friend class scoped_lock;
 };
 
+// TODO: use signed/unsigned in atomics more consistently
 inline intptr_t AtomicIncrement( volatile intptr_t& counter ) {
     return __TBB_FetchAndAddW( &counter, 1 )+1;
 }
@@ -98,6 +99,18 @@ inline intptr_t AtomicCompareExchange( volatile intptr_t& location, intptr_t new
     return __TBB_CompareAndSwapW( &location, new_value, comparand );
 }
 
+inline uintptr_t AtomicFetchStore(volatile void* location, uintptr_t value) {
+    return __TBB_FetchAndStoreW(location, value);
+}
+
+inline void AtomicOr(volatile void *operand, uintptr_t addend) {
+    __TBB_AtomicOR(operand, addend);
+}
+
+inline void AtomicAnd(volatile void *operand, uintptr_t addend) {
+    __TBB_AtomicAND(operand, addend);
+}
+
 inline intptr_t FencedLoad( const volatile intptr_t &location ) {
     return __TBB_load_with_acquire(location);
 }
@@ -130,13 +143,7 @@ static inline bool isPowerOfTwoMultiple(uintptr_t arg, uintptr_t divisor) {
     return arg && tbb::internal::is_power_of_two_factor(arg,divisor);
 }
 
-inline void AtomicOr(volatile void *operand, uintptr_t addend) {
-    __TBB_AtomicOR(operand, addend);
-}
-
-inline void AtomicAnd(volatile void *operand, uintptr_t addend) {
-    __TBB_AtomicAND(operand, addend);
-}
+#define MALLOC_STATIC_ASSERT(condition,msg) __TBB_STATIC_ASSERT(condition,msg)
 
 #define USE_DEFAULT_MEMORY_MAPPING 1
 
diff --git a/src/tbbmalloc/frontend.cpp b/src/tbbmalloc/frontend.cpp
index bbd5fe2..306cfd5 100644
--- a/src/tbbmalloc/frontend.cpp
+++ b/src/tbbmalloc/frontend.cpp
@@ -38,11 +38,13 @@
 #if USE_PTHREAD
     #define TlsSetValue_func pthread_setspecific
     #define TlsGetValue_func pthread_getspecific
+    #define GetMyTID() pthread_self()
     #include <sched.h>
     inline void do_yield() {sched_yield();}
     extern "C" { static void mallocThreadShutdownNotification(void*); }
 
 #elif USE_WINTHREAD
+    #define GetMyTID() GetCurrentThreadId()
 #if __TBB_WIN8UI_SUPPORT
 #include<thread>
     #define TlsSetValue_func FlsSetValue
@@ -117,13 +119,26 @@ public:
     void reset();
 };
 
+#if USE_INTERNAL_TID
 class ThreadId {
     static tls_key_t Tid_key;
     static intptr_t ThreadIdCount;
 
     unsigned int id;
-public:
 
+    static ThreadId get() {
+        ThreadId result;
+        result.id = reinterpret_cast<intptr_t>(TlsGetValue_func(Tid_key));
+        if( !result.id ) {
+            RecursiveMallocCallProtector scoped;
+            // Thread-local value is zero -> first call from this thread,
+            // need to initialize with next ID value (IDs start from 1)
+            result.id = AtomicIncrement(ThreadIdCount); // returned new value!
+            TlsSetValue_func( Tid_key, reinterpret_cast<void*>(result.id) );
+        }
+        return result;
+    }
+public:
     static void init() {
 #if USE_WINTHREAD
         Tid_key = TlsAlloc();
@@ -149,36 +164,53 @@ public:
             Tid_key = 0;
         }
     }
-    static ThreadId get() {
-        ThreadId result;
-        result.id = reinterpret_cast<intptr_t>(TlsGetValue_func(Tid_key));
-        if( !result.id ) {
-            RecursiveMallocCallProtector scoped;
-            // Thread-local value is zero -> first call from this thread,
-            // need to initialize with next ID value (IDs start from 1)
-            result.id = AtomicIncrement(ThreadIdCount); // returned new value!
-            TlsSetValue_func( Tid_key, reinterpret_cast<void*>(result.id) );
-        }
-        return result;
-    }
 
-    bool defined() const { return id; }
-    void undef() { id = 0; }
-    void invalid() { id = (unsigned int)-1; }
-    bool own() const { return id == ThreadId::get().id; }
+    bool isCurrentThreadId() const { return id == ThreadId::get().id; }
+    void saveCurrentThreadId() { id = ThreadId::get().id; }
+
+#if COLLECT_STATISTICS
+    static unsigned getMaxThreadId() { return ThreadIdCount; }
 
-    friend bool operator==(const ThreadId &id1, const ThreadId &id2);
     friend unsigned int getThreadId();
+    friend int STAT_increment(ThreadId tid, int bin, int ctr);
+#endif
 };
 
 tls_key_t ThreadId::Tid_key;
 intptr_t ThreadId::ThreadIdCount;
 
-bool operator==(const ThreadId &id1, const ThreadId &id2) {
-    return id1.id == id2.id;
-}
-
+#if COLLECT_STATISTICS
 unsigned int getThreadId() { return ThreadId::get().id; }
+#endif
+
+#else // USE_INTERNAL_TID
+
+class ThreadId {
+#if USE_PTHREAD
+    pthread_t tid;
+#else
+    DWORD     tid;
+#endif
+public:
+    ThreadId() : tid(GetMyTID()) {}
+    void saveCurrentThreadId() { tid = GetMyTID(); }
+#if USE_PTHREAD
+    bool isCurrentThreadId() const { return pthread_equal(pthread_self(), tid); }
+#else
+    bool isCurrentThreadId() const { return GetCurrentThreadId() == tid; }
+#endif
+    static void init() {}
+    static void destroy() {}
+};
+
+#endif // USE_INTERNAL_TID
+
+#if COLLECT_STATISTICS
+int STAT_increment(ThreadId tid, int bin, int ctr)
+{
+    return ::STAT_increment(tid.id, bin, ctr);
+}
+#endif
 
 /*********** Code to provide thread ID and a thread-local void pointer **********/
 
@@ -258,8 +290,8 @@ private:
 class OrphanedBlocks {
     LifoList bins[numBlockBinLimit];
 public:
-    Block *get(Bin* bin, unsigned int size);
-    void put(Bin* bin, Block *block);
+    Block *get(TLSData *tls, unsigned int size);
+    void put(Bin *bin, Block *block);
     void reset();
 };
 
@@ -289,15 +321,12 @@ public:
     inline TLSData *getTLS(bool create);
     void clearTLS() { extMemPool.tlsPointerKey.setThreadMallocTLS(NULL); }
 
-    Bin *getAllocationBin(TLSData* tls, size_t size);
     Block *getEmptyBlock(size_t size);
     void returnEmptyBlock(Block *block, bool poolTheBlock);
 
     // get/put large object to/from local large object cache
     void *getFromLLOCache(TLSData *tls, size_t size, size_t alignment);
     void putToLLOCache(TLSData *tls, void *object);
-
-    inline void allocatorCalledHook(TLSData *tls);
 };
 
 static char defaultMemPool_space[sizeof(MemoryPool)];
@@ -316,21 +345,33 @@ protected:
     Block       *nextPrivatizable;
 };
 
+template<size_t padd>
+struct Padding {
+    size_t       __padding[padd];
+};
+
+template<>
+struct Padding<0> {
+};
+
 class LocalBlockFields : public GlobalBlockFields {
 protected:
-    size_t       __pad_local_fields[(blockHeaderAlignment -
-                                     sizeof(GlobalBlockFields))/sizeof(size_t)];
+    Padding<(blockHeaderAlignment -
+             sizeof(GlobalBlockFields))/sizeof(size_t)> pad_local;
 
     Block       *next;
     Block       *previous;        /* Use double linked list to speed up removal */
-    uint16_t     objectSize;
-    ThreadId     owner;
     FreeObject  *bumpPtr;         /* Bump pointer moves from the end to the beginning of a block */
     FreeObject  *freeList;
+    /* Pointer to local data for the owner thread. Used for fast finding tls
+       when releasing object from a block that current thread owned.
+       NULL for orphaned blocks. */
+    TLSData     *tlsPtr;
+    ThreadId     ownerTid;
     BackRefIdx   backRefIdx;
-    unsigned int allocatedCount;  /* Number of objects allocated (obviously by the owning thread) */
+    uint16_t     allocatedCount;  /* Number of objects allocated (obviously by the owning thread) */
+    uint16_t     objectSize;
     bool         isFull;
-    bool         orphaned;
 
     friend void *BootStrapBlocks::allocate(MemoryPool *memPool, size_t size);
     friend class FreeBlockPool;
@@ -340,8 +381,8 @@ protected:
 };
 
 class Block : public LocalBlockFields {
-    size_t       __pad_public_fields[(2*blockHeaderAlignment -
-                                      sizeof(LocalBlockFields))/sizeof(size_t)];
+    Padding<(2*blockHeaderAlignment -
+             sizeof(LocalBlockFields))/sizeof(size_t)> pad_public;
 public:
     bool empty() const { return allocatedCount==0 && publicFreeList==NULL; }
     inline FreeObject* allocate();
@@ -353,15 +394,15 @@ public:
     void makeEmpty();
     void privatizePublicFreeList();
     void restoreBumpPtr();
-    void privatizeOrphaned(Bin *bin);
-    void shareOrphaned(const Bin *bin);
+    void privatizeOrphaned(TLSData *tls, unsigned index);
+    void shareOrphaned(const Bin *bin, unsigned index);
     unsigned int getSize() const {
         MALLOC_ASSERT(isStartupAllocObject() || objectSize<minLargeObjectSize,
                       "Invalid object size");
         return objectSize;
     }
     const BackRefIdx *getBackRefIdx() const { return &backRefIdx; }
-    bool ownBlock() const { return !orphaned && owner.own(); }
+    inline TLSData *ownBlock() const;
     bool isStartupAllocObject() const { return objectSize == startupAllocObjSizeMark; }
     inline FreeObject *findObjectToFree(void *object) const;
     bool checkFreePrecond(void *object) const {
@@ -375,7 +416,7 @@ public:
         return false;
     }
     const BackRefIdx *getBackRef() const { return &backRefIdx; }
-    void initEmptyBlock(Bin* tlsBin, size_t size);
+    void initEmptyBlock(TLSData *tls, size_t size);
 
 protected:
     void cleanBlockHeader();
@@ -386,6 +427,15 @@ private:
     inline FreeObject *allocateFromBumpPtr();
     inline FreeObject *findAllocatedObject(const void *address) const;
     inline bool isProperlyPlaced(const void *object) const;
+    inline bool isOrphaned() const { return !tlsPtr; }
+    inline void markOwned(TLSData *tls) {
+        ownerTid.saveCurrentThreadId();
+        tlsPtr = tls;
+    }
+    inline void markOrphaned() {
+        MALLOC_ASSERT(tlsPtr, ASSERT_TEXT);
+        tlsPtr = NULL;
+    }
 
     friend class Bin;
     friend class TLSData;
@@ -394,6 +444,10 @@ private:
 
 const float Block::emptyEnoughRatio = 1.0 / 4.0;
 
+MALLOC_STATIC_ASSERT(sizeof(Block) <= 2*estimatedCacheLineSize,
+    "The class Block does not fit into 2 cache lines on this platform. "
+    "Defining USE_INTERNAL_TID may help to fix it.");
+
 class Bin {
     Block      *activeBlk;
     Block      *mailbox;
@@ -483,11 +537,9 @@ const size_t scalableMallocPoolGranularity = 4*1024;  // page size, for mmap use
  */
 class FreeBlockPool {
     Block      *head;
-    Block      *tail;
     int         size;
     Backend    *backend;
     bool        lastAccessMiss;
-    void insertBlock(Block *block);
 public:
     static const int POOL_HIGH_MARK = 32;
     static const int POOL_LOW_MARK  = 8;
@@ -504,50 +556,53 @@ public:
     FreeBlockPool(Backend *bknd) : backend(bknd) {}
     ResOfGet getBlock();
     void returnBlock(Block *block);
-    bool releaseAllBlocks();
+    bool externalCleanup(); // can be called by another thread
 };
 
 template<int LOW_MARK, int HIGH_MARK>
-class LocalLOC {
+class LocalLOCImpl {
     static const size_t MAX_TOTAL_SIZE = 4*1024*1024;
-
     // TODO: can single-linked list be faster here?
     LargeMemoryBlock *head,
-                     *tail;
-    intptr_t          lastSeenOSCallsCnt,
-                      lastUsedOSCallsCnt;
+                     *tail; // need it when do releasing on overflow
     size_t            totalSize;
     int               numOfBlocks;
 public:
     bool put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool);
     LargeMemoryBlock *get(size_t size);
-    bool clean(ExtMemoryPool *extMemPool);
-    void allocatorCalledHook(ExtMemoryPool *extMemPool);
+    bool externalCleanup(ExtMemoryPool *extMemPool);
 #if __TBB_MALLOC_WHITEBOX_TEST
-    LocalLOC() : head(NULL), tail(NULL), lastSeenOSCallsCnt(0),
-                 lastUsedOSCallsCnt(0), totalSize(0),
-                 numOfBlocks(0) {}
+    LocalLOCImpl() : head(NULL), tail(NULL), totalSize(0), numOfBlocks(0) {}
     static size_t getMaxSize() { return MAX_TOTAL_SIZE; }
+    static const int LOC_HIGH_MARK = HIGH_MARK;
 #else
     // no ctor, object must be created in zero-initialized memory
 #endif
 };
 
-class TLSData {
+typedef LocalLOCImpl<8,32> LocalLOC; // set production code parameters
+
+class TLSData : public TLSRemote {
 #if USE_PTHREAD
     MemoryPool   *memPool;
 #endif
 public:
     Bin           bin[numBlockBinLimit];
     FreeBlockPool freeSlabBlocks;
-    LocalLOC<8,32> lloc;
+    LocalLOC      lloc;
 #if USE_PTHREAD
     TLSData(MemoryPool *mPool, Backend *bknd) : memPool(mPool), freeSlabBlocks(bknd) {}
     MemoryPool *getMemPool() const { return memPool; }
 #else
     TLSData(MemoryPool * /*memPool*/, Backend *bknd) : freeSlabBlocks(bknd) {}
 #endif
+    Bin* getAllocationBin(size_t size);
     void release(MemoryPool *mPool);
+    bool externalCleanup(ExtMemoryPool *mPool) {
+        // both cleanups to be called, and the order is not important
+        return lloc.externalCleanup(mPool) | freeSlabBlocks.externalCleanup();
+    }
+    bool cleanUnusedActiveBlocks(Backend *backend, bool userPool);
 };
 
 TLSData *TLSKey::createTLS(MemoryPool *memPool, Backend *backend)
@@ -563,33 +618,71 @@ TLSData *TLSKey::createTLS(MemoryPool *memPool, Backend *backend)
         tls->bin[i].verifyInitState();
 #endif
     setThreadMallocTLS(tls);
+    memPool->extMemPool.allLocalCaches.registerThread(tls);
     return tls;
 }
 
-bool ExtMemoryPool::releaseTLCaches()
+bool TLSData::cleanUnusedActiveBlocks(Backend *backend, bool userPool)
 {
     bool released = false;
+    // active blocks can be not used, so return them to backend
+    for (uint32_t i=0; i<numBlockBinLimit; i++)
+        if (bin[i].activeBlockUnused()) {
+            Block *block = bin[i].getActiveBlock();
+            bin[i].outofTLSBin(block);
+            // slab blocks in user's pools do not have valid backRefIdx
+            if (!userPool)
+                removeBackRef(*(block->getBackRefIdx()));
+            backend->putSlabBlock(block);
+
+            released = true;
+        }
+    return released;
+}
+
+bool ExtMemoryPool::releaseAllLocalCaches()
+{
+    bool released = allLocalCaches.cleanup(this);
+
+    if (TLSData *tlsData = tlsPointerKey.getThreadMallocTLS())
+        // released only for current thread for now
+        released |= tlsData->cleanUnusedActiveBlocks(&backend, userPool());
 
-    if (TLSData *tlsData = tlsPointerKey.getThreadMallocTLS()) {
-        released = tlsData->freeSlabBlocks.releaseAllBlocks();
-        released |= tlsData->lloc.clean(this);
-
-        // active blocks can be not used, so return them to backend
-        for (uint32_t i=0; i<numBlockBinLimit; i++)
-            if (tlsData->bin[i].activeBlockUnused()) {
-                Block *block = tlsData->bin[i].getActiveBlock();
-                tlsData->bin[i].outofTLSBin(block);
-                // slab blocks in user's pools do not have valid backRefIdx
-                if (!userPool())
-                    removeBackRef(*(block->getBackRefIdx()));
-                backend.putSlabBlock(block);
-
-                released = true;
-            }
-    }
     return released;
 }
 
+void AllLocalCaches::registerThread(TLSRemote *tls)
+{
+    tls->prev = NULL;
+    MallocMutex::scoped_lock lock(listLock);
+    tls->next = head;
+    if (head)
+        head->prev = tls;
+    head = tls;
+}
+
+void AllLocalCaches::unregisterThread(TLSRemote *tls)
+{
+    MallocMutex::scoped_lock lock(listLock);
+    if (head == tls)
+        head = tls->next;
+    if (tls->next)
+        tls->next->prev = tls->prev;
+    if (tls->prev)
+        tls->prev->next = tls->next;
+}
+
+bool AllLocalCaches::cleanup(ExtMemoryPool *extPool)
+{
+    bool total = false;
+    {
+        MallocMutex::scoped_lock lock(listLock);
+
+        for (TLSRemote *curr=head; curr; curr=curr->next)
+            total |= static_cast<TLSData*>(curr)->externalCleanup(extPool);
+    }
+    return total;
+}
 
 #if MALLOC_CHECK_RECURSION
 MallocMutex RecursiveMallocCallProtector::rmc_mutex;
@@ -863,9 +956,9 @@ TLSData* MemoryPool::getTLS(bool create)
 /*
  * Return the bin for the given size.
  */
-Bin* MemoryPool::getAllocationBin(TLSData* tls, size_t size)
+Bin* TLSData::getAllocationBin(size_t size)
 {
-    return tls->bin + getIndex(size);
+    return bin + getIndex(size);
 }
 
 /* Return an empty uninitialized block in a non-blocking fashion. */
@@ -909,6 +1002,7 @@ Block *MemoryPool::getEmptyBlock(size_t size)
                 setBackRef(backRefIdx[i], b);
                 b->backRefIdx = backRefIdx[i];
             }
+            b->tlsPtr = tls;
             // all but first one go to per-thread pool
             if (i > 0) {
                 MALLOC_ASSERT(tls, ASSERT_TEXT);
@@ -917,7 +1011,7 @@ Block *MemoryPool::getEmptyBlock(size_t size)
         }
     }
     if (result) {
-        result->initEmptyBlock(tls? tls->bin : NULL, size);
+        result->initEmptyBlock(tls, size);
         STAT_increment(result->owner, getIndex(result->objectSize), allocBlockNew);
     }
     return result;
@@ -1020,7 +1114,7 @@ void Bin::verifyTLSBin (size_t size) const
     uint32_t objSize = getObjectSize(size);
 
     if (activeBlk) {
-        MALLOC_ASSERT( activeBlk->owner.own(), ASSERT_TEXT );
+        MALLOC_ASSERT( activeBlk->ownerTid.isCurrentThreadId(), ASSERT_TEXT );
         MALLOC_ASSERT( activeBlk->objectSize == objSize, ASSERT_TEXT );
 #if MALLOC_DEBUG>1
         for (Block* temp = activeBlk->next; temp; temp=temp->next) {
@@ -1055,7 +1149,7 @@ void Bin::pushTLSBin(Block* block)
        because the function is applied to partially filled blocks as well */
     unsigned int size = block->objectSize;
 
-    MALLOC_ASSERT( block->owner == ThreadId::get(), ASSERT_TEXT );
+    MALLOC_ASSERT( block->ownerTid.isCurrentThreadId(), ASSERT_TEXT );
     MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT );
     MALLOC_ASSERT( block->next == NULL, ASSERT_TEXT );
     MALLOC_ASSERT( block->previous == NULL, ASSERT_TEXT );
@@ -1083,7 +1177,7 @@ void Bin::outofTLSBin(Block* block)
 {
     unsigned int size = block->objectSize;
 
-    MALLOC_ASSERT( block->owner == ThreadId::get(), ASSERT_TEXT );
+    MALLOC_ASSERT( block->ownerTid.isCurrentThreadId(), ASSERT_TEXT );
     MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT );
 
     MALLOC_ASSERT( this, ASSERT_TEXT );
@@ -1171,10 +1265,10 @@ void Block::freeOwnObject(MemoryPool *memPool, TLSData *tls, void *object)
     allocatedCount--;
     MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
 #if COLLECT_STATISTICS
-    if (getActiveBlock(memPool->getAllocationBin(block->objectSize)) != block)
-        STAT_increment(myTid, getIndex(block->objectSize), freeToInactiveBlock);
+    if (tls->getAllocationBin(objectSize)->getActiveBlock() != this)
+        STAT_increment(owner, getIndex(objectSize), freeToInactiveBlock);
     else
-        STAT_increment(myTid, getIndex(block->objectSize), freeToActiveBlock);
+        STAT_increment(owner, getIndex(objectSize), freeToActiveBlock);
 #endif
     if (allocatedCount==0 && publicFreeList==NULL) {
         // The bump pointer is about to be restored for the block,
@@ -1182,8 +1276,7 @@ void Block::freeOwnObject(MemoryPool *memPool, TLSData *tls, void *object)
 
         // if the last object of a slab is freed, the slab cannot be marked full
         MALLOC_ASSERT(!isFull, ASSERT_TEXT);
-        memPool->getAllocationBin(tls, objectSize)->
-            processLessUsedBlock(memPool, this);
+        tls->getAllocationBin(objectSize)->processLessUsedBlock(memPool, this);
     } else {
         FreeObject *objectToFree = findObjectToFree(object);
         objectToFree->next = freeList;
@@ -1191,7 +1284,7 @@ void Block::freeOwnObject(MemoryPool *memPool, TLSData *tls, void *object)
 
         if (isFull) {
             if (emptyEnoughToUse())
-                memPool->getAllocationBin(tls, objectSize)->moveBlockToBinFront(this);
+                tls->getAllocationBin(objectSize)->moveBlockToBinFront(this);
         }
     }
 }
@@ -1229,13 +1322,10 @@ void Block::freePublicObject (FreeObject *objectToFree)
         // So the executing thread is now the only one that can change nextPrivatizable
         if( !isNotForUse(nextPrivatizable) ) {
             MALLOC_ASSERT( nextPrivatizable!=NULL, ASSERT_TEXT );
-            MALLOC_ASSERT( owner.defined(), ASSERT_TEXT );
             Bin* theBin = (Bin*) nextPrivatizable;
             MallocMutex::scoped_lock scoped_cs(theBin->mailLock);
             nextPrivatizable = theBin->mailbox;
             theBin->mailbox = this;
-        } else {
-            MALLOC_ASSERT( !owner.defined(), ASSERT_TEXT );
         }
     }
     STAT_increment(ThreadId::get(), ThreadCommonCounters, freeToOtherThread);
@@ -1246,7 +1336,7 @@ void Block::privatizePublicFreeList()
 {
     FreeObject *temp, *localPublicFreeList;
 
-    MALLOC_ASSERT( owner.own(), ASSERT_TEXT );
+    MALLOC_ASSERT( ownerTid.isCurrentThreadId(), ASSERT_TEXT );
 #if FREELIST_NONBLOCKING
     temp = publicFreeList;
     do {
@@ -1284,16 +1374,16 @@ void Block::privatizePublicFreeList()
     }
 }
 
-void Block::privatizeOrphaned(Bin* bin)
+void Block::privatizeOrphaned(TLSData *tls, unsigned index)
 {
+    Bin* bin = tls->bin + index;
+    STAT_increment(owner, index, allocBlockPublic);
     next = NULL;
     previous = NULL;
     MALLOC_ASSERT( publicFreeList!=NULL, ASSERT_TEXT );
     /* There is not a race here since no other thread owns this block */
-    MALLOC_ASSERT( !owner.defined(), ASSERT_TEXT );
-    owner = ThreadId::get();
-    MALLOC_ASSERT(orphaned, ASSERT_TEXT);
-    orphaned = false;
+    MALLOC_ASSERT(isOrphaned(), ASSERT_TEXT);
+    markOwned(tls);
     // It is safe to change nextPrivatizable, as publicFreeList is not null
     MALLOC_ASSERT( isNotForUse(nextPrivatizable), ASSERT_TEXT );
     nextPrivatizable = (Block*)bin;
@@ -1307,12 +1397,11 @@ void Block::privatizeOrphaned(Bin* bin)
     MALLOC_ASSERT( !isNotForUse(publicFreeList), ASSERT_TEXT );
 }
 
-void Block::shareOrphaned(const Bin *bin)
+void Block::shareOrphaned(const Bin *bin, unsigned index)
 {
     MALLOC_ASSERT( bin, ASSERT_TEXT );
     STAT_increment(owner, index, freeBlockPublic);
-    MALLOC_ASSERT(!orphaned, ASSERT_TEXT);
-    orphaned = true;
+    markOrphaned();
     // need to set publicFreeList to non-zero, so other threads
     // will not change nextPrivatizable and it can be zeroed.
     if ((intptr_t)nextPrivatizable==(intptr_t)bin) {
@@ -1345,7 +1434,6 @@ void Block::shareOrphaned(const Bin *bin)
     MALLOC_ASSERT( publicFreeList!=NULL, ASSERT_TEXT );
     // now it is safe to change our data
     previous = NULL;
-    owner.undef();
     // it is caller responsibility to ensure that the list of blocks
     // formed by nextPrivatizable pointers is kept consistent if required.
     // if only called from thread shutdown code, it does not matter.
@@ -1359,12 +1447,12 @@ void Block::cleanBlockHeader()
     freeList = NULL;
     allocatedCount = 0;
     isFull = 0;
-    orphaned = false;
+    tlsPtr = NULL;
 
     publicFreeList = NULL;
 }
 
-void Block::initEmptyBlock(Bin* tlsBin, size_t size)
+void Block::initEmptyBlock(TLSData *tls, size_t size)
 {
     // Having getIndex and getObjectSize called next to each other
     // allows better compiler optimization as they basically share the code.
@@ -1373,35 +1461,33 @@ void Block::initEmptyBlock(Bin* tlsBin, size_t size)
 
     cleanBlockHeader();
     objectSize = objSz;
-    owner = ThreadId::get();
+    markOwned(tls);
     // bump pointer should be prepared for first allocation - thus mode it down to objectSize
     bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize);
 
     // each block should have the address where the head of the list of "privatizable" blocks is kept
     // the only exception is a block for boot strap which is initialized when TLS is yet NULL
-    nextPrivatizable = tlsBin? (Block*)(tlsBin + index) : NULL;
+    nextPrivatizable = tls? (Block*)(tls->bin + index) : NULL;
     TRACEF(( "[ScalableMalloc trace] Empty block %p is initialized, owner is %d, objectSize is %d, bumpPtr is %p\n",
              this, owner, objectSize, bumpPtr ));
 }
 
-Block *OrphanedBlocks::get(Bin* bin, unsigned int size)
+Block *OrphanedBlocks::get(TLSData *tls, unsigned int size)
 {
-    Block *result;
-    MALLOC_ASSERT( bin, ASSERT_TEXT );
+    // TODO: try to use index from getAllocationBin
     unsigned int index = getIndex(size);
-    result = bins[index].pop();
-    if (result) {
+    Block *block = bins[index].pop();
+    if (block) {
         MALLOC_ITT_SYNC_ACQUIRED(bins+index);
-        result->privatizeOrphaned(bin);
-        STAT_increment(result->owner, index, allocBlockPublic);
+        block->privatizeOrphaned(tls, index);
     }
-    return result;
+    return block;
 }
 
 void OrphanedBlocks::put(Bin* bin, Block *block)
 {
     unsigned int index = getIndex(block->getSize());
-    block->shareOrphaned(bin);
+    block->shareOrphaned(bin, index);
     MALLOC_ITT_SYNC_RELEASING(bins+index);
     bins[index].push(block);
 }
@@ -1412,25 +1498,15 @@ void OrphanedBlocks::reset()
         new (bins+i) LifoList();
 }
 
-void FreeBlockPool::insertBlock(Block *block)
-{
-    size++;
-    block->next = head;
-    head = block;
-    if (!tail)
-        tail = block;
-}
-
 FreeBlockPool::ResOfGet FreeBlockPool::getBlock()
 {
-    Block *b = head;
+    Block *b = (Block*)AtomicFetchStore(&head, 0);
 
-    if (head) {
+    if (b) {
         size--;
-        head = head->next;
-        if (!head)
-            tail = NULL;
+        Block *newHead = b->next;
         lastAccessMiss = false;
+        FencedStore((intptr_t&)head, (intptr_t)newHead);
     } else
         lastAccessMiss = true;
 
@@ -1440,42 +1516,46 @@ FreeBlockPool::ResOfGet FreeBlockPool::getBlock()
 void FreeBlockPool::returnBlock(Block *block)
 {
     MALLOC_ASSERT( size <= POOL_HIGH_MARK, ASSERT_TEXT );
-    if (size == POOL_HIGH_MARK) {
-        // release cold blocks and add hot one
-        Block *headToFree = head,
-              *helper;
+    Block *localHead = (Block*)AtomicFetchStore(&head, 0);
+
+    if (!localHead)
+        size = 0; // head was stolen by externalClean, correct size accordingly
+    else if (size == POOL_HIGH_MARK) {
+        // release cold blocks and add hot one,
+        // so keep POOL_LOW_MARK-1 blocks and add new block to head
+        Block *headToFree = localHead, *helper;
         for (int i=0; i<POOL_LOW_MARK-2; i++)
             headToFree = headToFree->next;
-        tail = headToFree;
+        Block *last = headToFree;
         headToFree = headToFree->next;
-        tail->next = NULL;
+        last->next = NULL;
         size = POOL_LOW_MARK-1;
-        // slab blocks from user pools not have valid backreference
         for (Block *currBl = headToFree; currBl; currBl = helper) {
             helper = currBl->next;
+            // slab blocks in user's pools do not have valid backRefIdx
             if (!backend->inUserPool())
                 removeBackRef(currBl->backRefIdx);
             backend->putSlabBlock(currBl);
         }
     }
-    insertBlock(block);
+    size++;
+    block->next = localHead;
+    FencedStore((intptr_t&)head, (intptr_t)block);
 }
 
-bool FreeBlockPool::releaseAllBlocks()
+bool FreeBlockPool::externalCleanup()
 {
     Block *helper;
-    bool nonEmpty = size;
+    bool nonEmpty = false;
 
-    for (Block *currBl = head; currBl; currBl=helper) {
+    for (Block *currBl=(Block*)AtomicFetchStore(&head, 0); currBl; currBl=helper) {
         helper = currBl->next;
-        // slab blocks in user's pools not have valid backRefIdx
+        // slab blocks in user's pools do not have valid backRefIdx
         if (!backend->inUserPool())
             removeBackRef(currBl->backRefIdx);
         backend->putSlabBlock(currBl);
+        nonEmpty = true;
     }
-    head = tail = NULL;
-    size = 0;
-
     return nonEmpty;
 }
 
@@ -1485,14 +1565,14 @@ void Block::makeEmpty()
     // it is caller's responsibility to ensure no data is lost before calling this
     MALLOC_ASSERT( allocatedCount==0, ASSERT_TEXT );
     MALLOC_ASSERT( publicFreeList==NULL, ASSERT_TEXT );
-    STAT_increment(owner, getIndex(objectSize), freeBlockBack);
+    if (!isStartupAllocObject())
+        STAT_increment(owner, getIndex(objectSize), freeBlockBack);
 
     cleanBlockHeader();
 
     nextPrivatizable = NULL;
 
     objectSize = 0;
-    owner.invalid();
     // for an empty block, bump pointer should point right after the end of the block
     bumpPtr = (FreeObject *)((uintptr_t)this + slabSize);
 }
@@ -1500,7 +1580,7 @@ void Block::makeEmpty()
 inline void Bin::setActiveBlock (Block *block)
 {
 //    MALLOC_ASSERT( bin, ASSERT_TEXT );
-    MALLOC_ASSERT( block->owner.own(), ASSERT_TEXT );
+    MALLOC_ASSERT( block->ownBlock(), ASSERT_TEXT );
     // it is the caller responsibility to keep bin consistence (i.e. ensure this block is in the bin list)
     activeBlk = block;
 }
@@ -1516,6 +1596,12 @@ inline Block* Bin::setPreviousBlockActive()
     return temp;
 }
 
+inline TLSData *Block::ownBlock() const {
+    if (!tlsPtr || !ownerTid.isCurrentThreadId()) return NULL;
+
+    return tlsPtr;
+}
+
 FreeObject *Block::findObjectToFree(void *object) const
 {
     FreeObject *objectToFree;
@@ -1543,8 +1629,8 @@ FreeObject *Block::findObjectToFree(void *object) const
 
 void TLSData::release(MemoryPool *mPool)
 {
-    lloc.clean(&mPool->extMemPool);
-    freeSlabBlocks.releaseAllBlocks();
+    mPool->extMemPool.allLocalCaches.unregisterThread(this);
+    externalCleanup(&mPool->extMemPool);
 
     for (unsigned index = 0; index < numBlockBins; index++) {
         Block *activeBlk = bin[index].getActiveBlock();
@@ -1877,7 +1963,7 @@ FreeObject *Block::allocateFromBumpPtr()
 
 inline FreeObject* Block::allocate()
 {
-    MALLOC_ASSERT( owner.own(), ASSERT_TEXT );
+    MALLOC_ASSERT( ownBlock(), ASSERT_TEXT );
 
     /* for better cache locality, first looking in the free list. */
     if ( FreeObject *result = allocateFromFreeList() ) {
@@ -1917,20 +2003,27 @@ void Bin::processLessUsedBlock(MemoryPool *memPool, Block *block)
 }
 
 template<int LOW_MARK, int HIGH_MARK>
-bool LocalLOC<LOW_MARK, HIGH_MARK>::put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool)
+bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool)
 {
     const size_t size = object->unalignedSize;
+    // not spoil cache with too large object, that can cause its total cleanup
     if (size > MAX_TOTAL_SIZE)
         return false;
+    LargeMemoryBlock *localHead = (LargeMemoryBlock*)AtomicFetchStore(&head, 0);
 
-    totalSize += size;
     object->prev = NULL;
-    object->next = head;
-    if (head) head->prev = object;
-    head = object;
-    if (!tail) tail = object;
+    object->next = localHead;
+    if (localHead)
+        localHead->prev = object;
+    else {
+        // those might not be cleaned during local cache stealing, correct them
+        totalSize = 0;
+        numOfBlocks = 0;
+        tail = object;
+    }
+    localHead = object;
+    totalSize += size;
     numOfBlocks++;
-    MALLOC_ASSERT(!tail->next, ASSERT_TEXT);
     // must meet both size and number of cached objects constrains
     if (totalSize > MAX_TOTAL_SIZE || numOfBlocks >= HIGH_MARK) {
         // scanning from tail until meet conditions
@@ -1944,59 +2037,50 @@ bool LocalLOC<LOW_MARK, HIGH_MARK>::put(LargeMemoryBlock *object, ExtMemoryPool
 
         extMemPool->freeLargeObjectList(headToRelease);
     }
-    lastUsedOSCallsCnt = lastSeenOSCallsCnt;
+
+    FencedStore((intptr_t&)head, (intptr_t)localHead);
     return true;
 }
 
 template<int LOW_MARK, int HIGH_MARK>
-LargeMemoryBlock *LocalLOC<LOW_MARK, HIGH_MARK>::get(size_t size)
+LargeMemoryBlock *LocalLOCImpl<LOW_MARK, HIGH_MARK>::get(size_t size)
 {
-    if (lastUsedOSCallsCnt != lastSeenOSCallsCnt)
-        lastUsedOSCallsCnt = lastSeenOSCallsCnt;
+    LargeMemoryBlock *localHead, *res=NULL;
 
-    for (LargeMemoryBlock *curr = head; curr; curr=curr->next) {
+    if (!(localHead = (LargeMemoryBlock*)AtomicFetchStore(&head, 0))) {
+        // do not restore totalSize, numOfBlocks and tail at this point,
+        // as they are used only in put(), where they must be restored
+        return NULL;
+    }
+
+    for (LargeMemoryBlock *curr = localHead; curr; curr=curr->next) {
         if (curr->unalignedSize == size) {
-            LargeMemoryBlock *res = curr;
+            res = curr;
             if (curr->next)
                 curr->next->prev = curr->prev;
             else
                 tail = curr->prev;
-            if (curr->prev)
+            if (curr != localHead)
                 curr->prev->next = curr->next;
             else
-                head = curr->next;
+                localHead = curr->next;
             totalSize -= size;
             numOfBlocks--;
-            return res;
+            break;
         }
     }
-    return NULL;
-}
-
-template<int LOW_MARK, int HIGH_MARK>
-bool LocalLOC<LOW_MARK, HIGH_MARK>::clean(ExtMemoryPool *extMemPool)
-{
-    bool released = numOfBlocks;
-
-    if (numOfBlocks)
-        extMemPool->freeLargeObjectList(head);
-    head = tail = NULL;
-    numOfBlocks = 0;
-    totalSize = 0;
-    return released;
+    FencedStore((intptr_t&)head, (intptr_t)localHead);
+    return res;
 }
 
 template<int LOW_MARK, int HIGH_MARK>
-void LocalLOC<LOW_MARK, HIGH_MARK>::allocatorCalledHook(ExtMemoryPool *extMemPool)
+bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::externalCleanup(ExtMemoryPool *extMemPool)
 {
-    intptr_t currCnt = extMemPool->backend.askMemFromOSCounter.get();
-
-    // clean the cache iff there was OS memory request since last hook call
-    // and the cache was not touched since previous OS memory request
-    if (currCnt != lastSeenOSCallsCnt && lastUsedOSCallsCnt != lastSeenOSCallsCnt
-        && head)
-        clean(extMemPool);
-    lastSeenOSCallsCnt = currCnt;
+    if (LargeMemoryBlock *localHead = (LargeMemoryBlock*)AtomicFetchStore(&head, 0)) {
+        extMemPool->freeLargeObjectList(localHead);
+        return true;
+    }
+    return false;
 }
 
 void *MemoryPool::getFromLLOCache(TLSData* tls, size_t size, size_t alignment)
@@ -2039,13 +2123,6 @@ void MemoryPool::putToLLOCache(TLSData *tls, void *object)
         extMemPool.freeLargeObject(header->memoryBlock);
 }
 
-// called on each allocator call
-void MemoryPool::allocatorCalledHook(TLSData *tls)
-{
-    // TODO: clean freeSlabBlocks as well
-    tls->lloc.allocatorCalledHook(&extMemPool);
-}
-
 #if USE_PTHREAD && (__TBB_SOURCE_DIRECTLY_INCLUDED || __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND)
 
 /* Decrease race interval between dynamic library unloading and pthread key
@@ -2126,7 +2203,6 @@ static void *allocateAligned(MemoryPool *memPool, size_t size, size_t alignment)
         if (!isMallocInitialized())
             doInitialization();
         TLSData *tls = memPool->getTLS(/*create=*/true);
-        memPool->allocatorCalledHook(tls);
         // take into account only alignment that are higher then natural
         result =
             memPool->getFromLLOCache(tls, size, largeObjectAlignment>alignment?
@@ -2237,7 +2313,7 @@ static inline bool isRecognized (void* ptr)
     return isLargeObject(ptr) || isSmallObject(ptr);
 }
 
-static inline void freeSmallObject(MemoryPool *memPool, TLSData *tls, void *object)
+static inline void freeSmallObject(MemoryPool *memPool, void *object)
 {
     /* mask low bits to get the block */
     Block *block = (Block *)alignDown(object, slabSize);
@@ -2250,7 +2326,7 @@ static inline void freeSmallObject(MemoryPool *memPool, TLSData *tls, void *obje
         return;
     }
 #endif
-    if (block->ownBlock())
+    if (TLSData *tls = block->ownBlock())
         block->freeOwnObject(memPool, tls, object);
     else { /* Slower path to add to the shared list, the allocatedCount is updated by the owner thread in malloc. */
         FreeObject *objectToFree = block->findObjectToFree(object);
@@ -2268,7 +2344,6 @@ static void *internalPoolMalloc(MemoryPool* memPool, size_t size)
     if (!size) size = sizeof(size_t);
 
     TLSData *tls = memPool->getTLS(/*create=*/true);
-    memPool->allocatorCalledHook(tls);
     /*
      * Use Large Object Allocation
      */
@@ -2279,7 +2354,7 @@ static void *internalPoolMalloc(MemoryPool* memPool, size_t size)
      * Get an element in thread-local array corresponding to the given size;
      * It keeps ptr to the active block for allocations of this size
      */
-    bin = memPool->getAllocationBin(tls, size);
+    bin = tls->getAllocationBin(size);
     if ( !bin ) return NULL;
 
     /* Get a block to try to allocate in. */
@@ -2309,13 +2384,13 @@ static void *internalPoolMalloc(MemoryPool* memPool, size_t size)
     /*
      * no suitable own blocks, try to get a partial block that some other thread has discarded.
      */
-    mallocBlock = memPool->orphanedBlocks.get(bin, size);
+    mallocBlock = memPool->orphanedBlocks.get(tls, size);
     while (mallocBlock) {
         bin->pushTLSBin(mallocBlock);
         bin->setActiveBlock(mallocBlock); // TODO: move under the below condition?
         if( FreeObject *result = mallocBlock->allocate() )
             return result;
-        mallocBlock = memPool->orphanedBlocks.get(bin, size);
+        mallocBlock = memPool->orphanedBlocks.get(tls, size);
     }
 
     /*
@@ -2347,13 +2422,11 @@ static bool internalPoolFree(MemoryPool *memPool, void *object)
     MALLOC_ASSERT(isMallocInitialized(), ASSERT_TEXT);
     MALLOC_ASSERT(memPool->extMemPool.userPool() || isRecognized(object),
                   "Invalid pointer in pool_free detected.");
-    TLSData *tls = memPool->getTLS(/*create=*/false);
-    if (tls) memPool->allocatorCalledHook(tls);
 
     if (isLargeObject(object))
-        memPool->putToLLOCache(tls, object);
+        memPool->putToLLOCache(memPool->getTLS(/*create=*/false), object);
     else
-        freeSmallObject(memPool, tls, object);
+        freeSmallObject(memPool, object);
     return true;
 }
 
@@ -2423,7 +2496,7 @@ rml::MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy,
 {
     if ( !policy->pAlloc || policy->version<MemPoolPolicy::TBBMALLOC_POOL_VERSION
          // empty pFree allowed only for fixed pools
-         || !(policy->fixedPool || policy->pFree) ) {
+         || !(policy->fixedPool || policy->pFree)) {
         *pool = NULL;
         return INVALID_POLICY;
     }
@@ -2552,7 +2625,7 @@ void mallocThreadShutdownNotification(void* arg)
             memPool->processThreadShutdown(tls);
 #else
     if (!shutdownSync.threadDtorStart()) return;
-    // The routine is called for each memPool, just need to get memPool from TLSData.
+    // The routine is called for each memPool, gets memPool from TLSData.
     TLSData *tls = (TLSData*)arg;
     tls->getMemPool()->processThreadShutdown(tls);
     shutdownSync.threadDtorDone();
@@ -2598,7 +2671,7 @@ extern "C" void __TBB_mallocProcessShutdownNotification()
 #endif // __TBB_SOURCE_DIRECTLY_INCLUDED
 
 #if COLLECT_STATISTICS
-    ThreadId nThreads = ThreadIdCount;
+    unsigned nThreads = ThreadId::getMaxThreadId();
     for( int i=1; i<=nThreads && i<MAX_THREADS; ++i )
         STAT_print(i);
 #endif
@@ -2628,14 +2701,10 @@ extern "C" void safer_scalable_free (void *object, void (*original_free)(void*))
     // and it can be unaccessable
     if (isLargeObject(object)) {
         TLSData *tls = defaultMemPool->getTLS(/*create=*/false);
-        if (tls) defaultMemPool->allocatorCalledHook(tls);
 
         defaultMemPool->putToLLOCache(tls, object);
     } else if (isSmallObject(object)) {
-        TLSData *tls = defaultMemPool->getTLS(/*create=*/false);
-        if (tls) defaultMemPool->allocatorCalledHook(tls);
-
-        freeSmallObject(defaultMemPool, tls, object);
+        freeSmallObject(defaultMemPool, object);
     } else if (original_free)
         original_free(object);
 }
@@ -2874,19 +2943,37 @@ extern "C" size_t safer_scalable_msize (void *object, size_t (*original_msize)(v
 
 extern "C" int scalable_allocation_mode(int param, intptr_t value)
 {
+    if (param == USE_HUGE_PAGES) {
 #if __linux__
-    if (param == USE_HUGE_PAGES)
         switch (value) {
         case 0:
         case 1:
             hugePages.setMode(value);
-            return 0;
+            return TBBMALLOC_OK;
         default:
-            return 1;
+            return TBBMALLOC_INVALID_PARAM;
         }
 #else
-    suppress_unused_warning(param);
-    suppress_unused_warning(value);
+        suppress_unused_warning(value);
+        return TBBMALLOC_NO_EFFECT;
 #endif
-    return 1;
+    }
+    return TBBMALLOC_INVALID_PARAM;
+}
+
+extern "C" int scalable_allocation_command(int cmd, void *param)
+{
+    if (param)
+        return TBBMALLOC_INVALID_PARAM;
+    switch(cmd) {
+    case TBBMALLOC_CLEAN_THREAD_BUFFERS:
+        if (TLSData *tls = defaultMemPool->getTLS(/*create=*/false))
+            return tls->externalCleanup(&defaultMemPool->extMemPool)?
+                TBBMALLOC_OK : TBBMALLOC_NO_EFFECT;
+        return TBBMALLOC_NO_EFFECT;
+    case TBBMALLOC_CLEAN_ALL_BUFFERS:
+        return defaultMemPool->extMemPool.hardCachesCleanup()?
+            TBBMALLOC_OK : TBBMALLOC_NO_EFFECT;
+    }
+    return TBBMALLOC_INVALID_PARAM;
 }
diff --git a/src/tbbmalloc/large_objects.cpp b/src/tbbmalloc/large_objects.cpp
index 83798b0..4446d54 100644
--- a/src/tbbmalloc/large_objects.cpp
+++ b/src/tbbmalloc/large_objects.cpp
@@ -69,7 +69,7 @@ LargeMemoryBlock *LargeObjectCacheImpl<Props>::CacheBin::
 
         for (curr=tail, i=0; curr; curr=curr->prev, i++) {
             curr->age = currTime+i;
-            STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeBlk);
+            STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeObj);
         }
 
         if (!lastCleanedAge) {
@@ -133,8 +133,8 @@ LargeMemoryBlock *LargeObjectCacheImpl<Props>::CacheBin::
                 oldest = 0;
             }
             // use moving average with current hit interval
-            intptr_t hitR = currTime - result->age;
-            lastHit = lastHit? (lastHit + hitR)/2 : hitR;
+            intptr_t hitRange = currTime - result->age;
+            meanHitRange = meanHitRange? (meanHitRange + hitRange)/2 : hitRange;
 
             cachedSize -= size;
         } else {
@@ -268,7 +268,7 @@ size_t LargeObjectCacheImpl<Props>::CacheBin::reportStat(int num, FILE *f)
 #if __TBB_MALLOC_LOCACHE_STAT
     if (first)
         printf("%d(%lu): total %lu KB thr %ld lastCln %lu lastHit %lu oldest %lu\n",
-               num, num*CacheStep+MinSize,
+               num, num*Props::CacheStep+Props::MinSize,
                cachedSize/1024, ageThreshold, lastCleanedAge, lastHit, oldest);
 #else
     suppress_unused_warning(num);
@@ -405,7 +405,7 @@ LargeMemoryBlock *LargeObjectCacheImpl<Props>::get(uintptr_t currTime, size_t si
         bitMask.set(idx, true);
     if (lmb) {
         MALLOC_ITT_SYNC_ACQUIRED(bin+idx);
-        STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeBlk);
+        STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeObj);
     }
     return lmb;
 }
@@ -423,16 +423,15 @@ template<typename Props>
 void LargeObjectCacheImpl<Props>::reportStat(FILE *f)
 {
     size_t cachedSize = 0;
-    for (int i=0; i<numLargeBlockBins; i++)
+    for (int i=0; i<numBins; i++)
         cachedSize += bin[i].reportStat(i, f);
-    fprintf(f, "total LOC size %lu MB\nnow %lu\n", cachedSize/1024/1024,
-            loCacheStat.age);
+    fprintf(f, "total LOC size %lu MB\n", cachedSize/1024/1024);
 }
 
 void LargeObjectCache::reportStat(FILE *f)
 {
-    largeObjs.reportStat(f);
-    hugeObjs.reportStat(f);
+    largeCache.reportStat(f);
+    hugeCache.reportStat(f);
 }
 #endif
 
@@ -571,7 +570,6 @@ void ExtMemoryPool::freeLargeObjectList(LargeMemoryBlock *head)
 
 bool ExtMemoryPool::softCachesCleanup()
 {
-    // TODO: cleanup small objects as well
     return loc.regularCleanup(&backend);
 }
 
@@ -579,8 +577,9 @@ bool ExtMemoryPool::hardCachesCleanup()
 {
     // thread-local caches must be cleaned before LOC,
     // because object from thread-local cache can be released to LOC
-    bool tlCaches = releaseTLCaches(), locCaches = loc.cleanAll(&backend);
-    return tlCaches || locCaches;
+    bool ret = releaseAllLocalCaches();
+    ret |= loc.cleanAll(&backend);
+    return ret;
 }
 
 
diff --git a/src/tbbmalloc/lin32-tbbmalloc-export.def b/src/tbbmalloc/lin32-tbbmalloc-export.def
index 58c95cf..04f7a8c 100644
--- a/src/tbbmalloc/lin32-tbbmalloc-export.def
+++ b/src/tbbmalloc/lin32-tbbmalloc-export.def
@@ -44,6 +44,7 @@ __TBB_internal_realloc;
 __TBB_internal_posix_memalign;
 scalable_msize;
 scalable_allocation_mode;
+scalable_allocation_command;
 
 /* memory pool stuff */
 _ZN3rml10pool_resetEPNS_10MemoryPoolE;
diff --git a/src/tbbmalloc/lin64-tbbmalloc-export.def b/src/tbbmalloc/lin64-tbbmalloc-export.def
index ef6cb6f..d6e591f 100644
--- a/src/tbbmalloc/lin64-tbbmalloc-export.def
+++ b/src/tbbmalloc/lin64-tbbmalloc-export.def
@@ -44,6 +44,7 @@ __TBB_internal_realloc;
 __TBB_internal_posix_memalign;
 scalable_msize;
 scalable_allocation_mode;
+scalable_allocation_command;
 
 /* memory pool stuff */
 _ZN3rml11pool_createElPKNS_13MemPoolPolicyE;
diff --git a/src/tbbmalloc/lin64ipf-tbbmalloc-export.def b/src/tbbmalloc/lin64ipf-tbbmalloc-export.def
index ef6cb6f..d6e591f 100644
--- a/src/tbbmalloc/lin64ipf-tbbmalloc-export.def
+++ b/src/tbbmalloc/lin64ipf-tbbmalloc-export.def
@@ -44,6 +44,7 @@ __TBB_internal_realloc;
 __TBB_internal_posix_memalign;
 scalable_msize;
 scalable_allocation_mode;
+scalable_allocation_command;
 
 /* memory pool stuff */
 _ZN3rml11pool_createElPKNS_13MemPoolPolicyE;
diff --git a/src/tbbmalloc/mac32-tbbmalloc-export.def b/src/tbbmalloc/mac32-tbbmalloc-export.def
index 4eb11f7..5c60c6f 100644
--- a/src/tbbmalloc/mac32-tbbmalloc-export.def
+++ b/src/tbbmalloc/mac32-tbbmalloc-export.def
@@ -36,6 +36,7 @@ _scalable_aligned_realloc
 _scalable_aligned_free
 _scalable_msize
 _scalable_allocation_mode
+_scalable_allocation_command
 /* memory pool stuff */
 __ZN3rml11pool_createElPKNS_13MemPoolPolicyE
 __ZN3rml14pool_create_v1ElPKNS_13MemPoolPolicyEPPNS_10MemoryPoolE
diff --git a/src/tbbmalloc/mac64-tbbmalloc-export.def b/src/tbbmalloc/mac64-tbbmalloc-export.def
index 4eb11f7..5c60c6f 100644
--- a/src/tbbmalloc/mac64-tbbmalloc-export.def
+++ b/src/tbbmalloc/mac64-tbbmalloc-export.def
@@ -36,6 +36,7 @@ _scalable_aligned_realloc
 _scalable_aligned_free
 _scalable_msize
 _scalable_allocation_mode
+_scalable_allocation_command
 /* memory pool stuff */
 __ZN3rml11pool_createElPKNS_13MemPoolPolicyE
 __ZN3rml14pool_create_v1ElPKNS_13MemPoolPolicyEPPNS_10MemoryPoolE
diff --git a/src/tbbmalloc/tbbmalloc_internal.h b/src/tbbmalloc/tbbmalloc_internal.h
index 64853da..68ff30d 100644
--- a/src/tbbmalloc/tbbmalloc_internal.h
+++ b/src/tbbmalloc/tbbmalloc_internal.h
@@ -80,6 +80,10 @@
 #define ASSERT_TEXT NULL
 
 #define COLLECT_STATISTICS ( MALLOC_DEBUG && MALLOCENV_COLLECT_STATISTICS )
+#ifndef USE_INTERNAL_TID
+#define USE_INTERNAL_TID COLLECT_STATISTICS
+#endif
+
 #include "Statistics.h"
 
 // call yield for whitebox testing, skip in real library
@@ -217,6 +221,24 @@ public:
     }
 };
 
+
+// The part of thread-specific data that can be modified by other threads.
+// Such modifications must be protected by AllLocalCaches::listLock.
+struct TLSRemote {
+    TLSRemote *next,
+              *prev;
+};
+
+// The list of all thread-local data; supporting cleanup of thread caches
+class AllLocalCaches {
+    TLSRemote  *head;
+    MallocMutex listLock; // protects operations in the list
+public:
+    void registerThread(TLSRemote *tls);
+    void unregisterThread(TLSRemote *tls);
+    bool cleanup(ExtMemoryPool *extPool);
+};
+
 /* cache blocks in range [MinSize; MaxSize) in bins with CacheStep
  TooLargeFactor -- when cache size treated "too large" in comparison to user data size
  OnMissFactor -- If cache miss occurred and cache was cleaned,
@@ -278,8 +300,8 @@ class LargeObjectCacheImpl {
         size_t            usedSize,
   /* total size of all objects cached in the bin */
                           cachedSize;
-  /* time of last hit for the bin */
-        intptr_t          lastHit;
+  /* mean time of presence of block in the bin before successful reuse */
+        intptr_t          meanHitRange;
   /* time of last get called for the bin */
         uintptr_t         lastGet;
 
@@ -293,7 +315,7 @@ class LargeObjectCacheImpl {
         inline LargeMemoryBlock *get(size_t size, uintptr_t currTime, bool *setNonEmpty);
         void decreaseThreshold() {
             if (ageThreshold)
-                ageThreshold = (ageThreshold + lastHit)/2;
+                ageThreshold = (ageThreshold + meanHitRange)/2;
         }
         void updateBinsSummary(BinsSummary *binsSummary) const {
             binsSummary->update(usedSize, cachedSize);
@@ -350,7 +372,8 @@ public:
 class LargeObjectCache {
     static const size_t minLargeSize =  8*1024,
                         maxLargeSize =  8*1024*1024,
-                        maxHugeSize = 128*1024*1024;
+    // There are benchmarks of interest that should work well with objects of this size
+                        maxHugeSize = 129*1024*1024;
 public:
     // Difference between object sizes in large block bins
     static const uint32_t largeBlockCacheStep =  8*1024,
@@ -695,7 +718,6 @@ class AllLargeBlocksList {
     MallocMutex       largeObjLock;
     LargeMemoryBlock *loHead;
 public:
-    LargeMemoryBlock *getHead() { return loHead; }
     void add(LargeMemoryBlock *lmb);
     void remove(LargeMemoryBlock *lmb);
     template<bool poolDestroy> void releaseAll(Backend *backend);
@@ -713,10 +735,12 @@ struct ExtMemoryPool {
     size_t            granularity;
     bool              keepAllMemory,
                       delayRegsReleasing,
+    // TODO: implements fixedPool with calling rawFree on destruction
                       fixedPool;
     TLSKey            tlsPointerKey;  // per-pool TLS key
 
     LargeObjectCache  loc;
+    AllLocalCaches    allLocalCaches;
 
     bool init(intptr_t poolId, rawAllocType rawAlloc, rawFreeType rawFree,
               size_t granularity, bool keepAllMemory, bool fixedPool);
@@ -727,8 +751,7 @@ struct ExtMemoryPool {
 
      // true if something has beed released
     bool softCachesCleanup();
-    bool releaseTLCaches();
-    // TODO: to release all thread's pools, not just current thread
+    bool releaseAllLocalCaches();
     bool hardCachesCleanup();
     void reset() {
         loc.reset();
diff --git a/src/tbbmalloc/win32-gcc-tbbmalloc-export.def b/src/tbbmalloc/win32-gcc-tbbmalloc-export.def
index bbf6d71..53849a2 100644
--- a/src/tbbmalloc/win32-gcc-tbbmalloc-export.def
+++ b/src/tbbmalloc/win32-gcc-tbbmalloc-export.def
@@ -40,6 +40,7 @@ safer_scalable_free;
 safer_scalable_realloc;
 scalable_msize;
 scalable_allocation_mode;
+scalable_allocation_command;
 safer_scalable_msize;
 safer_scalable_aligned_realloc;
 /* memory pool stuff */
diff --git a/src/tbbmalloc/win32-tbbmalloc-export.def b/src/tbbmalloc/win32-tbbmalloc-export.def
index 43c97d7..6155981 100644
--- a/src/tbbmalloc/win32-tbbmalloc-export.def
+++ b/src/tbbmalloc/win32-tbbmalloc-export.def
@@ -39,6 +39,7 @@ safer_scalable_free
 safer_scalable_realloc
 scalable_msize
 scalable_allocation_mode
+scalable_allocation_command
 safer_scalable_msize
 safer_scalable_aligned_realloc
 ?pool_create at rml@@YAPAVMemoryPool at 1@HPBUMemPoolPolicy at 1@@Z
diff --git a/src/tbbmalloc/win64-gcc-tbbmalloc-export.def b/src/tbbmalloc/win64-gcc-tbbmalloc-export.def
index 646f338..7938ab0 100644
--- a/src/tbbmalloc/win64-gcc-tbbmalloc-export.def
+++ b/src/tbbmalloc/win64-gcc-tbbmalloc-export.def
@@ -40,6 +40,7 @@ safer_scalable_free;
 safer_scalable_realloc;
 scalable_msize;
 scalable_allocation_mode;
+scalable_allocation_command;
 safer_scalable_msize;
 safer_scalable_aligned_realloc;
 /* memory pool stuff */
diff --git a/src/tbbmalloc/win64-tbbmalloc-export.def b/src/tbbmalloc/win64-tbbmalloc-export.def
index 7b18b03..0eccd62 100644
--- a/src/tbbmalloc/win64-tbbmalloc-export.def
+++ b/src/tbbmalloc/win64-tbbmalloc-export.def
@@ -39,6 +39,7 @@ safer_scalable_free
 safer_scalable_realloc
 scalable_msize
 scalable_allocation_mode
+scalable_allocation_command
 safer_scalable_msize
 safer_scalable_aligned_realloc
 ; memory pool stuff
diff --git a/src/tbbproxy/tbbproxy.cpp b/src/tbbproxy/tbbproxy.cpp
index 49e4332..ab18bb2 100644
--- a/src/tbbproxy/tbbproxy.cpp
+++ b/src/tbbproxy/tbbproxy.cpp
@@ -382,7 +382,7 @@ static tbb::runtime_loader::error_code _load( char const * dll_name, int min_ver
     // First load the library.
     _handle = dlopen( dll_name, RTLD_NOW );
     if ( _handle == NULL ) {
-        char * msg = dlerror();
+        const char * msg = dlerror();
         code = error( mode, tbb::runtime_loader::ec_no_lib, "Loading \"%s\" failed; system error: %s", dll_name, msg );
         goto error;
     } // if
diff --git a/src/test/harness.h b/src/test/harness.h
index 5c93e87..8b2dfd5 100644
--- a/src/test/harness.h
+++ b/src/test/harness.h
@@ -130,113 +130,123 @@ int TestMain ();
 #endif // HARNESS_USE_RUNTIME_LOADER
 
 #if !HARNESS_NO_ASSERT
+    #include "harness_assert.h"
+    #if TEST_USES_TBB
+        #include <tbb/tbb_stddef.h> /*set_assertion_handler*/
+
+        struct InitReporter {
+            InitReporter() {
+        #if TBB_USE_ASSERT
+                tbb::set_assertion_handler(ReportError);
+        #endif
+            ASSERT_WARNING(TBB_INTERFACE_VERSION <= tbb::TBB_runtime_interface_version(), "runtime version mismatch");
+        }
+        };
+        static InitReporter InitReportError;
+    #endif
 
-#include "harness_assert.h"
-#if TEST_USES_TBB
-#include <tbb/tbb_stddef.h> /*set_assertion_handler*/
-
-struct InitReporter {
-    InitReporter() {
-#if TBB_USE_ASSERT
-        tbb::set_assertion_handler(ReportError);
-#endif
-        ASSERT_WARNING(TBB_INTERFACE_VERSION <= tbb::TBB_runtime_interface_version(), "runtime version mismatch");
+    typedef void (*test_error_extra_t)(void);
+    static test_error_extra_t ErrorExtraCall;
+    //! Set additional handler to process failed assertions
+    void SetHarnessErrorProcessing( test_error_extra_t extra_call ) {
+        ErrorExtraCall = extra_call;
     }
-};
-static InitReporter InitReportError;
-#endif
-
-typedef void (*test_error_extra_t)(void);
-static test_error_extra_t ErrorExtraCall;
-//! Set additional handler to process failed assertions
-void SetHarnessErrorProcessing( test_error_extra_t extra_call ) {
-    ErrorExtraCall = extra_call;
-}
 
-//! Reports errors issued by failed assertions
-void ReportError( const char* filename, int line, const char* expression, const char * message ) {
-#if BACKTRACE_FUNCTION_AVAILABLE
-    const int sz = 100; // max number of frames to capture
-    void *buff[sz];
-    int n = backtrace(buff, sz);
-    REPORT("Call stack info (%d):\n", n);
-    backtrace_symbols_fd(buff, n, fileno(stdout));
-#elif __SUNPRO_CC
-    REPORT("Call stack info:\n");
-    printstack(fileno(stdout));
-#elif _WIN32_WINNT > 0x0501 && _MSC_VER && !__TBB_WIN8UI_SUPPORT
-    const int sz = 62; // XP limitation for number of frames
-    void *buff[sz];
-    int n = CaptureStackBackTrace(0, sz, buff, NULL);
-    REPORT("Call stack info (%d):\n", n);
-    static LONG once = 0;
-    if( !InterlockedExchange(&once, 1) )
-        SymInitialize(GetCurrentProcess(), NULL, TRUE);
-    const int len = 255; // just some reasonable string buffer size
-    union { SYMBOL_INFO sym; char pad[sizeof(SYMBOL_INFO)+len]; };
-    sym.MaxNameLen = len;
-    sym.SizeOfStruct = sizeof( SYMBOL_INFO );
-    DWORD64 offset;
-    for(int i = 1; i < n; i++) { // skip current frame
-        if(!SymFromAddr( GetCurrentProcess(), DWORD64(buff[i]), &offset, &sym )) {
-            sym.Address = ULONG64(buff[i]); offset = 0; sym.Name[0] = 0;
+    //! Reports errors issued by failed assertions
+    void ReportError( const char* filename, int line, const char* expression, const char * message ) {
+    #if BACKTRACE_FUNCTION_AVAILABLE
+        const int sz = 100; // max number of frames to capture
+        void *buff[sz];
+        int n = backtrace(buff, sz);
+        REPORT("Call stack info (%d):\n", n);
+        backtrace_symbols_fd(buff, n, fileno(stdout));
+    #elif __SUNPRO_CC
+        REPORT("Call stack info:\n");
+        printstack(fileno(stdout));
+    #elif _WIN32_WINNT > 0x0501 && _MSC_VER && !__TBB_WIN8UI_SUPPORT
+        const int sz = 62; // XP limitation for number of frames
+        void *buff[sz];
+        int n = CaptureStackBackTrace(0, sz, buff, NULL);
+        REPORT("Call stack info (%d):\n", n);
+        static LONG once = 0;
+        if( !InterlockedExchange(&once, 1) )
+            SymInitialize(GetCurrentProcess(), NULL, TRUE);
+        const int len = 255; // just some reasonable string buffer size
+        union { SYMBOL_INFO sym; char pad[sizeof(SYMBOL_INFO)+len]; };
+        sym.MaxNameLen = len;
+        sym.SizeOfStruct = sizeof( SYMBOL_INFO );
+        DWORD64 offset;
+        for(int i = 1; i < n; i++) { // skip current frame
+            if(!SymFromAddr( GetCurrentProcess(), DWORD64(buff[i]), &offset, &sym )) {
+                sym.Address = ULONG64(buff[i]); offset = 0; sym.Name[0] = 0;
+            }
+            REPORT("[%d] %016I64LX+%04I64LX: %s\n", i, sym.Address, offset, sym.Name); //TODO: print module name
         }
-        REPORT("[%d] %016I64LX+%04I64LX: %s\n", i, sym.Address, offset, sym.Name); //TODO: print module name
-    }
-#endif /*BACKTRACE_FUNCTION_AVAILABLE*/
+    #endif /*BACKTRACE_FUNCTION_AVAILABLE*/
 
-#if __TBB_ICL_11_1_CODE_GEN_BROKEN
-    printf("%s:%d, assertion %s: %s\n", filename, line, expression, message ? message : "failed" );
-#else
-    REPORT_FATAL_ERROR("%s:%d, assertion %s: %s\n", filename, line, expression, message ? message : "failed" );
-#endif
+    #if __TBB_ICL_11_1_CODE_GEN_BROKEN
+        printf("%s:%d, assertion %s: %s\n", filename, line, expression, message ? message : "failed" );
+    #else
+        REPORT_FATAL_ERROR("%s:%d, assertion %s: %s\n", filename, line, expression, message ? message : "failed" );
+    #endif
 
-    if( ErrorExtraCall )
-        (*ErrorExtraCall)();
-    fflush(stdout); fflush(stderr);
-#if HARNESS_TERMINATE_ON_ASSERT
-    TerminateProcess(GetCurrentProcess(), 1);
-#elif HARNESS_EXIT_ON_ASSERT
-    exit(1);
-#elif HARNESS_CONTINUE_ON_ASSERT
-    // continue testing
-#elif _MSC_VER && _DEBUG
-    // aligned with tbb_assert_impl.h behavior
-    if(1 == _CrtDbgReport(_CRT_ASSERT, filename, line, NULL, "%s\r\n%s", expression, message?message:""))
-        _CrtDbgBreak();
-#else
-    abort();
-#endif /* HARNESS_EXIT_ON_ASSERT */
-}
-//! Reports warnings issued by failed warning assertions
-void ReportWarning( const char* filename, int line, const char* expression, const char * message ) {
-    REPORT("Warning: %s:%d, assertion %s: %s\n", filename, line, expression, message ? message : "failed" );
-}
+        if( ErrorExtraCall )
+            (*ErrorExtraCall)();
+        fflush(stdout); fflush(stderr);
+    #if HARNESS_TERMINATE_ON_ASSERT
+        TerminateProcess(GetCurrentProcess(), 1);
+    #elif HARNESS_EXIT_ON_ASSERT
+        exit(1);
+    #elif HARNESS_CONTINUE_ON_ASSERT
+        // continue testing
+    #elif _MSC_VER && _DEBUG
+        // aligned with tbb_assert_impl.h behavior
+        if(1 == _CrtDbgReport(_CRT_ASSERT, filename, line, NULL, "%s\r\n%s", expression, message?message:""))
+            _CrtDbgBreak();
+    #else
+        abort();
+    #endif /* HARNESS_EXIT_ON_ASSERT */
+    }
+    //! Reports warnings issued by failed warning assertions
+    void ReportWarning( const char* filename, int line, const char* expression, const char * message ) {
+        REPORT("Warning: %s:%d, assertion %s: %s\n", filename, line, expression, message ? message : "failed" );
+    }
 
 #else /* !HARNESS_NO_ASSERT */
 
-#define ASSERT(p,msg) (Harness::suppress_unused_warning(p), (void)0)
-#define ASSERT_WARNING(p,msg) (Harness::suppress_unused_warning(p), (void)0)
+    #define ASSERT(p,msg) (Harness::suppress_unused_warning(p), (void)0)
+    #define ASSERT_WARNING(p,msg) (Harness::suppress_unused_warning(p), (void)0)
 
 #endif /* !HARNESS_NO_ASSERT */
 
-//TODO: unify with utility::internal::array_length from examples common utilities
-template<typename T, size_t N>
-inline size_t array_length(const T(&)[N])
-{
-   return N;
-}
+namespace Harness {
+    //TODO: unify with utility::internal::array_length from examples common utilities
+    template<typename T, size_t N>
+    inline size_t array_length(const T(&)[N])
+    {
+       return N;
+    }
 
-//TODO: remove this #if __TBB_INITIALIZER_LISTS_PRESENT
-//it looks like all other compilers except gcc issue warnings/errors then they see
-//declaration of zero sized array
-#if __TBB_INITIALIZER_LISTS_PRESENT
-template<typename T>
-inline size_t array_length(const T[0])
-{
-   return 0;
-}
-#endif //__TBB_INITIALIZER_LISTS_PRESENT
+    //TODO: remove this #if __TBB_INITIALIZER_LISTS_PRESENT
+    //it looks like all other compilers except gcc issue warnings/errors then they see
+    //declaration of zero sized array
+    #if __TBB_INITIALIZER_LISTS_PRESENT
+        template<typename T>
+        inline size_t array_length(const T[0])
+        {
+           return 0;
+        }
+    #endif //__TBB_INITIALIZER_LISTS_PRESENT
+} //namespace Harness
+
+#if TEST_USES_TBB
+    #include "tbb/blocked_range.h"
+
+    namespace Harness {
+        template<typename T, size_t N>
+        tbb::blocked_range<T*> make_blocked_range( T(& array)[N]){ return tbb::blocked_range<T*>(array, array + N);}
+    }
+#endif
 
 #if !HARNESS_NO_PARSE_COMMAND_LINE
 
diff --git a/src/test/harness_defs.h b/src/test/harness_defs.h
index 045a3f4..126ad0e 100644
--- a/src/test/harness_defs.h
+++ b/src/test/harness_defs.h
@@ -141,6 +141,10 @@
 namespace Harness {
     //! Utility template function to prevent "unused" warnings by various compilers.
     template<typename T> void suppress_unused_warning( const T& ) {}
+
+    //TODO: unify with one in tbb::internal
+    //! Utility helper structure to ease overload resolution
+    template<int > struct int_to_type {};
 }
 
 #endif /* __TBB_harness_defs_H */
diff --git a/src/test/harness_tsx.h b/src/test/harness_tsx.h
new file mode 100644
index 0000000..4f22291
--- /dev/null
+++ b/src/test/harness_tsx.h
@@ -0,0 +1,89 @@
+/*
+    Copyright 2005-2013 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+// Header that sets HAVE_tsx if TSX is available
+#define HAVE_TSX ( __TBB_x86_32 || __TBB_x86_64 )
+
+#if HAVE_TSX 
+
+// TODO: extend it for other compilers when we add API for XTEST
+#if __INTEL_COMPILER
+
+#include "harness_defs.h"
+
+inline static bool IsInsideTx()
+{
+#if _MSC_VER
+    __int8 res = 0;
+    __asm {
+      _asm _emit 0x0F 
+      _asm _emit 0x01 
+      _asm _emit 0xD6
+      _asm setz ah
+      _asm mov  res, ah
+    }
+    return res==0;
+#else
+    int8_t res = 0;
+    __asm__ __volatile__ (".byte 0x0F; .byte 0x01; .byte 0xD6;\n"
+                          "setz %0" : "=r"(res) : : "memory" );
+#endif
+    return res==0;
+}
+
+#if _MSC_VER
+#include <intrin.h> // for __cpuid
+#endif
+bool have_TSX() {
+    bool result = false;
+    const int hle_ebx_mask = 1<<4;
+    const int rtm_ebx_mask = 1<<11;
+#if _MSC_VER
+    int info[4] = {0,0,0,0};
+    const int EBX = 1;
+    __cpuidex(info, 7, 0);
+    result = (info[EBX] & hle_ebx_mask)!=0;
+    if( result ) ASSERT( (info[EBX] & rtm_ebx_mask)!=0, NULL );
+#elif __GNUC__
+    int EBX = 0;
+    int32_t reg_eax = 7;
+    int32_t reg_ecx = 0;
+    __asm__ __volatile__ ( "movl %%ebx, %%esi\n"
+                           "cpuid\n"
+                           "movl %%ebx, %0\n"
+                           "movl %%esi, %%ebx\n"
+                           : "=a"(EBX) : "0" (reg_eax), "c" (reg_ecx) : "esi" );
+    result = (EBX & hle_ebx_mask)!=0 ;
+    if( result ) ASSERT( (EBX & rtm_ebx_mask)!=0, NULL );
+#endif
+    return result;
+}
+
+#endif /* __INTEL_COMPILER */
+
+#endif /* HAVE_TSX */
diff --git a/src/test/test_ScalableAllocator.cpp b/src/test/test_ScalableAllocator.cpp
index 3d48780..eb1b3b9 100644
--- a/src/test/test_ScalableAllocator.cpp
+++ b/src/test/test_ScalableAllocator.cpp
@@ -32,7 +32,7 @@
 #define TBB_PREVIEW_MEMORY_POOL 1
 
 #include "harness_assert.h"
-#if __linux__  && __ia64__
+#if !__TBB_SOURCE_DIRECTLY_INCLUDED
 // Currently pools high-level interface has dependency to TBB library
 // to get atomics. For sake of testing add rudementary implementation of them.
 #include "harness_tbb_independence.h"
@@ -115,11 +115,16 @@ void TestZeroSpaceMemoryPool() { }
 struct FixedPool {
     void  *buf;
     size_t size;
-    FixedPool(void *buf, size_t size) : buf(buf), size(size) {}
+    bool   used;
+    FixedPool(void *buf, size_t size) : buf(buf), size(size), used(false) {}
 };
 
 static void *fixedBufGetMem(intptr_t pool_id, size_t &bytes)
 {
+    if (((FixedPool*)pool_id)->used)
+        return NULL;
+
+    ((FixedPool*)pool_id)->used = true;
     bytes = ((FixedPool*)pool_id)->size;
     return ((FixedPool*)pool_id)->buf;
 }
diff --git a/src/test/test_ScalableAllocator_STL.cpp b/src/test/test_ScalableAllocator_STL.cpp
index 48c1aca..6c8529c 100644
--- a/src/test/test_ScalableAllocator_STL.cpp
+++ b/src/test/test_ScalableAllocator_STL.cpp
@@ -33,7 +33,7 @@
 #define TBB_PREVIEW_MEMORY_POOL 1
 
 #include "harness_assert.h"
-#if __linux__  && __ia64__
+#if !__TBB_SOURCE_DIRECTLY_INCLUDED
 // Currently pools high-level interface has dependency to TBB library
 // to get atomics. For sake of testing add rudementary implementation of them.
 #include "harness_tbb_independence.h"
diff --git a/src/test/test_atomic.cpp b/src/test/test_atomic.cpp
index 59ab109..d34722a 100644
--- a/src/test/test_atomic.cpp
+++ b/src/test/test_atomic.cpp
@@ -401,11 +401,16 @@ void TestConstExprInitializationIsTranslationTime(){
     ASSERT(a == 8,ct_init_failed_msg);
 
     constexpr tbb::atomic<test_constexpr_initialization_helper::white_box_ad_hoc_type> ct_atomic(10);
+    //for some unknown reason clang does not managed to enum syntax
+#if __clang__
+    constexpr int ct_atomic_value_ten = (int)ct_atomic;
+#else
     enum {ct_atomic_value_ten = (int)ct_atomic};
+#endif
     __TBB_STATIC_ASSERT(ct_atomic_value_ten == 10, "translation time init failed?");
     ASSERT(ct_atomic_value_ten == 10,ct_init_failed_msg);
     int array[ct_atomic_value_ten];
-    ASSERT(array_length(array) == 10,ct_init_failed_msg);
+    ASSERT(Harness::array_length(array) == 10,ct_init_failed_msg);
 }
 
 #include <string>
diff --git a/src/test/test_buffer_node.cpp b/src/test/test_buffer_node.cpp
index 178825a..56b6042 100644
--- a/src/test/test_buffer_node.cpp
+++ b/src/test/test_buffer_node.cpp
@@ -421,7 +421,7 @@ int test_serial() {
 
 int TestMain() { 
     tbb::tick_count start = tbb::tick_count::now(), stop;
-    for (int p = 1; p < 4; ++p) {
+    for (int p = 2; p <= 4; ++p) {
         tbb::task_scheduler_init init(p);
         test_serial<int>();
         test_parallel<int>(p);
diff --git a/src/test/test_cilk_interop.cpp b/src/test/test_cilk_interop.cpp
index d5506e5..6bd560b 100644
--- a/src/test/test_cilk_interop.cpp
+++ b/src/test/test_cilk_interop.cpp
@@ -43,6 +43,7 @@ static const int P_outer = 4;
 static const int P_nested = 2;
 
 #include <cilk/cilk.h>
+#include <cilk/cilk_api.h>
 #define private public
 #include "tbb/task.h"
 #undef private
diff --git a/src/test/test_concurrent_hash_map.cpp b/src/test/test_concurrent_hash_map.cpp
index 14b0965..b22a539 100644
--- a/src/test/test_concurrent_hash_map.cpp
+++ b/src/test/test_concurrent_hash_map.cpp
@@ -917,6 +917,21 @@ void TestExceptions() {
 }
 #endif /* TBB_USE_EXCEPTIONS */
 
+
+#if __TBB_INITIALIZER_LISTS_PRESENT
+#include "test_initializer_list.h"
+
+void TestInitList(){
+    using namespace initializer_list_support_tests;
+    REMARK("testing initializer_list methods \n");
+
+    typedef tbb::concurrent_hash_map<int,int>::value_type value_type;
+    std::initializer_list<value_type > pairs_il = {{1,1},{2,2},{3,3},{4,4},{5,5}};
+
+    TestInitListSupportWithoutAssign<tbb::concurrent_hash_map<int,int> >(pairs_il);
+    TestInitListSupportWithoutAssign<tbb::concurrent_hash_map<int,int> >({});
+}
+#endif //if __TBB_INITIALIZER_LISTS_PRESENT
 //------------------------------------------------------------------------
 // Test driver
 //------------------------------------------------------------------------
@@ -936,6 +951,10 @@ int TestMain () {
     TestRehash();
     TestAssignment();
     TestIteratorsAndRanges();
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    TestInitList();
+#endif //__TBB_INITIALIZER_LISTS_PRESENT
+
 #if TBB_USE_EXCEPTIONS
     TestExceptions();
 #endif /* TBB_USE_EXCEPTIONS */
diff --git a/src/test/test_concurrent_priority_queue.cpp b/src/test/test_concurrent_priority_queue.cpp
index b91ad9c..e239044 100644
--- a/src/test/test_concurrent_priority_queue.cpp
+++ b/src/test/test_concurrent_priority_queue.cpp
@@ -183,15 +183,10 @@ bool operator==(tbb::concurrent_priority_queue<element_type> const& lhs, range c
     return to_vector()(lhs) == std::vector<element_type>(rhs.begin(),rhs.end());
 }
 
-//TODO: move this to harness
-template<typename T, size_t N>
-tbb::blocked_range<T*> make_blocked_range( T(& array)[N]){ return tbb::blocked_range<T*>(array, array + N);}
-
-
 void TestToVector(){
     using equality_comparison_helpers::to_vector;
     int array[] = {1,5,6,8,4,7};
-    tbb::blocked_range<int *> range =  make_blocked_range(array);
+    tbb::blocked_range<int *> range =  Harness::make_blocked_range(array);
     std::vector<int> source(range.begin(),range.end());
     tbb::concurrent_priority_queue<int> q(source.begin(),source.end());
     std::vector<int> from_cpq = to_vector()(q);
@@ -526,18 +521,11 @@ void TestCpqOnNThreads(int nThreads) {
 #if __TBB_INITIALIZER_LISTS_PRESENT
 #include "test_initializer_list.h"
 
-#define __TBB_CPQ_TEST_INIT_SEQ {1,2,3,4,5}
-__TBB_TEST_INIT_LIST_SUITE(TestInitListIml,tbb::concurrent_priority_queue,char,__TBB_CPQ_TEST_INIT_SEQ)
-#undef __TBB_CPQ_TEST_INIT_SEQ
-
-#define __TBB_CPQ_TEST_EMPTY_INIT_SEQ {}
-__TBB_TEST_INIT_LIST_SUITE(TestEmptyInitListIml,tbb::concurrent_priority_queue,int,__TBB_CPQ_TEST_EMPTY_INIT_SEQ)
-#undef __TBB_CPQ_TEST_EMPTY_INIT_SEQ
-
 void TestInitList(){
     REMARK("testing initializer_list methods \n");
-    TestEmptyInitListIml();
-    TestInitListIml();
+    using namespace initializer_list_support_tests;
+    TestInitListSupport<tbb::concurrent_priority_queue<char> >({1,2,3,4,5});
+    TestInitListSupport<tbb::concurrent_priority_queue<int> >({});
 }
 #endif //if __TBB_INITIALIZER_LISTS_PRESENT
 
diff --git a/src/test/test_concurrent_unordered.cpp b/src/test/test_concurrent_unordered.cpp
index adb188a..f7053ca 100644
--- a/src/test/test_concurrent_unordered.cpp
+++ b/src/test/test_concurrent_unordered.cpp
@@ -211,6 +211,63 @@ struct SpecialTests <MyMultiMap>
     }
 };
 
+#if __TBB_INITIALIZER_LISTS_PRESENT
+//these operator== are used implicitly in  test_initializer_list.h.
+//For some unknown reason clang is not able to find the if they a declared after the
+//inclusion of test_initializer_list.h.
+template<typename container_type>
+bool equal_containers(container_type const& lhs, container_type const& rhs){
+    if (lhs.size() != rhs.size()){
+        return false;
+    }
+    return std::equal(lhs.begin(),lhs.end(),lhs.begin());
+}
+
+template<typename T>
+bool operator==(tbb::concurrent_unordered_set<T> const& lhs, tbb::concurrent_unordered_set<T> const& rhs){
+    return equal_containers(lhs,rhs);
+}
+
+template<typename T>
+bool operator==(tbb::concurrent_unordered_multiset<T> const& lhs, tbb::concurrent_unordered_multiset<T> const& rhs){
+    return equal_containers(lhs,rhs);
+}
+
+template<typename Key, typename Value>
+bool operator==(tbb::concurrent_unordered_map<Key,Value> const& lhs, tbb::concurrent_unordered_map<Key,Value> const& rhs){
+    return equal_containers(lhs,rhs);
+}
+
+template<typename Key, typename Value>
+bool operator==(tbb::concurrent_unordered_multimap<Key,Value> const& lhs, tbb::concurrent_unordered_multimap<Key,Value> const& rhs){
+    return equal_containers(lhs,rhs);
+}
+
+#include "test_initializer_list.h"
+
+void TestInitList(){
+    using namespace initializer_list_support_tests;
+    REMARK("testing initializer_list methods \n");
+
+    std::initializer_list<int> il = {1,2,3,4,5};
+
+    TestInitListSupportWithoutAssign<tbb::concurrent_unordered_set<int> >(il);
+    TestInitListSupportWithoutAssign<tbb::concurrent_unordered_set<int> >({});
+
+    TestInitListSupportWithoutAssign<tbb::concurrent_unordered_multiset<int> >(il);
+    TestInitListSupportWithoutAssign<tbb::concurrent_unordered_multiset<int> >({});
+
+    typedef tbb::concurrent_unordered_map<int,int>::value_type value_type;
+    std::initializer_list<value_type > pairs_il = {{1,1},{2,2},{3,3},{4,4},{5,5}};
+
+    TestInitListSupportWithoutAssign<tbb::concurrent_unordered_map<int,int> >(pairs_il);
+    TestInitListSupportWithoutAssign<tbb::concurrent_unordered_map<int,int> >({});
+
+    TestInitListSupportWithoutAssign<tbb::concurrent_unordered_multimap<int,int> >(pairs_il);
+    TestInitListSupportWithoutAssign<tbb::concurrent_unordered_multimap<int,int> >({});
+}
+#endif //if __TBB_INITIALIZER_LISTS_PRESENT
+
 template<typename T>
 void test_basic(const char * str)
 {
@@ -719,6 +776,10 @@ void TEST_INITIALIZATION_TIME_OPERATIONS_NAME(){
 
 #if !__TBB_TEST_SECONDARY
 int TestMain () {
+    #if __TBB_INITIALIZER_LISTS_PRESENT
+        TestInitList();
+    #endif
+
     test_machine();
     test_basic<MyMap>("concurrent unordered Map");
     test_concurrent<MyMap>("concurrent unordered Map");
diff --git a/src/test/test_concurrent_vector.cpp b/src/test/test_concurrent_vector.cpp
index d9d381b..3b6f4d5 100644
--- a/src/test/test_concurrent_vector.cpp
+++ b/src/test/test_concurrent_vector.cpp
@@ -642,24 +642,17 @@ void TestConcurrentGrowBy( int nthread ) {
 //TODO: move this to more appropriate place, smth like test_harness.cpp
 void TestArrayLength(){
     int five_elementh_array[5] = {0};
-    ASSERT(array_length(five_elementh_array)==5,"array_length failed to determine length of non empty non dynamic array");
+    ASSERT(Harness::array_length(five_elementh_array)==5,"array_length failed to determine length of non empty non dynamic array");
 }
 
 #if __TBB_INITIALIZER_LISTS_PRESENT
 #include "test_initializer_list.h"
 
-#define __TBB_CVECTOR_TEST_INIT_SEQ {1,2,3,4,5}
-__TBB_TEST_INIT_LIST_SUITE(TestInitListIml,tbb::concurrent_vector,char,__TBB_CVECTOR_TEST_INIT_SEQ )
-#undef __TBB_CVECTOR_TEST_INIT_SEQ
-
-#define __TBB_CVECTOR_TEST_EMPTY_INIT_SEQ {}
-__TBB_TEST_INIT_LIST_SUITE(TestEmptyInitListIml,tbb::concurrent_vector,int,__TBB_CVECTOR_TEST_EMPTY_INIT_SEQ )
-#undef __TBB_CVECTOR_TEST_EMPTY_INIT_SEQ
-
 void TestInitList(){
     REMARK("testing initializer_list methods \n");
-    TestEmptyInitListIml();
-    TestInitListIml();
+    using namespace initializer_list_support_tests;
+    TestInitListSupport<tbb::concurrent_vector<char> >({1,2,3,4,5});
+    TestInitListSupport<tbb::concurrent_vector<int> >({});
 }
 #endif //if __TBB_INITIALIZER_LISTS_PRESENT
 
diff --git a/src/test/test_dynamic_link.cpp b/src/test/test_dynamic_link.cpp
new file mode 100644
index 0000000..687c357
--- /dev/null
+++ b/src/test/test_dynamic_link.cpp
@@ -0,0 +1,90 @@
+/*
+    Copyright 2005-2013 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+enum FOO_TYPE {
+    FOO_DUMMY,
+    FOO_IMPLEMENTATION
+};
+
+#if _WIN32 || _WIN64
+#define TEST_EXPORT
+#else
+#define TEST_EXPORT extern "C"
+#endif /* _WIN32 || _WIN64 */
+
+// foo "implementations".
+TEST_EXPORT FOO_TYPE foo1() { return FOO_IMPLEMENTATION; }
+TEST_EXPORT FOO_TYPE foo2() { return FOO_IMPLEMENTATION; }
+// foo "dummies".
+FOO_TYPE dummy_foo1() { return FOO_DUMMY; }
+FOO_TYPE dummy_foo2() { return FOO_DUMMY; }
+
+// Handlers.
+static FOO_TYPE (*foo1_handler)() = &dummy_foo1;
+static FOO_TYPE (*foo2_handler)() = &dummy_foo2;
+
+#include "tbb/tbb_config.h"
+// Suppress the weak symbol mechanism to avoid surplus compiler warnings.
+#ifdef __TBB_WEAK_SYMBOLS_PRESENT
+#undef __TBB_WEAK_SYMBOLS_PRESENT
+#endif
+// Use of harness assert to avoid the dependency on TBB
+#include "harness_assert.h"
+#define LIBRARY_ASSERT(p,message) ASSERT(p,message)
+#include "tbb/dynamic_link.h"
+// Table describing how to link the handlers.
+static const tbb::internal::dynamic_link_descriptor LinkTable[] = {
+    { "foo1", (tbb::internal::pointer_to_handler*)(void*)(&foo1_handler) },
+    { "foo2", (tbb::internal::pointer_to_handler*)(void*)(&foo2_handler) }
+};
+
+// The direct include since we want to test internal functionality.
+#include "tbb/dynamic_link.cpp"
+#include "harness.h"
+#include "harness_dynamic_libs.h"
+
+int TestMain () {
+#if !_WIN32
+    // Check if the executable exports its symbols.
+    ASSERT( Harness::GetAddress( Harness::OpenLibrary(NULL), "foo1" ) && Harness::GetAddress( Harness::OpenLibrary(NULL), "foo2" ),
+            "The executable doesn't export its symbols. Is the -rdynamic switch set during linking?" );
+#endif /* !_WIN32 */
+    // We want to link (or fail to link) to the symbols available from the 
+    // executable so it doesn't matter what the library name is specified in
+    // the dynamic_link call - let it be an empty string.
+    // Generally speaking the test has sense only on Linux but on Windows it
+    // checks the dynamic_link graceful behavior with incorrect library name.
+    if ( tbb::internal::dynamic_link( "", LinkTable, sizeof(LinkTable)/sizeof(LinkTable[0]) ) ) {
+        ASSERT( foo1_handler && foo2_handler, "The symbols are corrupted by dynamic_link" );
+        ASSERT( foo1_handler() == FOO_IMPLEMENTATION && foo2_handler() == FOO_IMPLEMENTATION,
+                "dynamic_link returned the successful code but symbol(s) are wrong" );
+    } else {
+        ASSERT( foo1_handler==dummy_foo1 && foo2_handler==dummy_foo2, "The symbols are corrupted by dynamic_link" );
+    }
+    return Harness::Done;
+}
diff --git a/src/test/test_eh_algorithms.cpp b/src/test/test_eh_algorithms.cpp
index d4b5a13..a2ed9db 100644
--- a/src/test/test_eh_algorithms.cpp
+++ b/src/test/test_eh_algorithms.cpp
@@ -499,6 +499,61 @@ void TestCancelation3 () {
     ASSERT ( result == expected, "Wrong calculation result");
 }
 
+struct StatsCounters {
+    tbb::atomic<size_t> my_total_created;
+    tbb::atomic<size_t> my_total_deleted;
+    StatsCounters() {
+        my_total_created = 0;
+        my_total_deleted = 0;
+    }
+};
+
+class ParReduceBody {
+    StatsCounters* my_stats;
+    size_t my_id;
+    bool my_exception;
+
+public:
+    ParReduceBody( StatsCounters& s_, bool e_ ) : my_stats(&s_), my_exception(e_) {
+        my_id = my_stats->my_total_created++;
+    }
+
+    ParReduceBody( const ParReduceBody& lhs ) {
+        my_stats = lhs.my_stats;
+        my_id = my_stats->my_total_created++;
+    }
+
+    ParReduceBody( ParReduceBody& lhs, tbb::split ) {
+        my_stats = lhs.my_stats;
+        my_id = my_stats->my_total_created++;
+    }
+
+    ~ParReduceBody(){ ++my_stats->my_total_deleted; }
+
+    void operator()( const tbb::blocked_range<std::size_t>& /*range*/ ) const {
+        //Do nothing, except for one task (chosen arbitrarily)
+        if( my_id >= 12 ) {
+            if( my_exception )
+                ThrowTestException(1);
+            else
+                tbb::task::self().cancel_group_execution();
+        }
+    }
+
+    void join( ParReduceBody& /*rhs*/ ) {}
+};
+
+void TestCancelation4() {
+    StatsCounters statsObj;
+    __TBB_TRY {
+        tbb::task_group_context tgc1, tgc2;
+        ParReduceBody body_for_cancellation(statsObj, false), body_for_exception(statsObj, true);
+        tbb::parallel_reduce( tbb::blocked_range<std::size_t>(0,100000000,100), body_for_cancellation, tbb::simple_partitioner(), tgc1 );
+        tbb::parallel_reduce( tbb::blocked_range<std::size_t>(0,100000000,100), body_for_exception, tbb::simple_partitioner(), tgc2 );
+    } __TBB_CATCH(...) {}
+    ASSERT ( statsObj.my_total_created==statsObj.my_total_deleted, "Not all parallel_reduce body objects created were reclaimed");
+}
+
 void RunParForAndReduceTests () {
     REMARK( "parallel for and reduce tests\n" );
     tbb::task_scheduler_init init (g_NumThreads);
@@ -514,6 +569,7 @@ void RunParForAndReduceTests () {
     TestCancelation1();
     TestCancelation2();
     TestCancelation3();
+    TestCancelation4();
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/test/test_initializer_list.h b/src/test/test_initializer_list.h
index 9b281fa..2f312c2 100644
--- a/src/test/test_initializer_list.h
+++ b/src/test/test_initializer_list.h
@@ -30,42 +30,134 @@
 #define __TBB_test_initializer_list_H
 #include "tbb/tbb_config.h"
 
-
 #if __TBB_INITIALIZER_LISTS_PRESENT
 #include <initializer_list>
-//TODO: split into set of tests
-//TODO: add test for no leaks, and correct element lifetime
-//the need for macro comes from desire to test different scenarios where initializer sequence is compile time constant
-#define __TBB_TEST_INIT_LIST_SUITE(FUNC_NAME, CONTAINER, ELEMENT_TYPE, INIT_SEQ)                                                                  \
-void FUNC_NAME(){                                                                                                                                 \
-    typedef ELEMENT_TYPE element_type;                                                                                                            \
-    typedef CONTAINER<element_type> container_type;                                                                                               \
-    element_type test_seq[] = INIT_SEQ;                                                                                                           \
-    container_type expected(test_seq,test_seq + array_length(test_seq));                                                                          \
-                                                                                                                                                  \
-    /*test for explicit contructor call*/                                                                                                         \
-    container_type vd (INIT_SEQ,tbb::cache_aligned_allocator<int>());                                                                             \
-    ASSERT(vd == expected,"initialization via explicit constructor call with init list failed");                                                  \
-    /*test for explicit contructor call with std::initializer_list*/                                                                              \
-                                                                                                                                                  \
-    std::initializer_list<element_type> init_list = INIT_SEQ;                                                                                     \
-    container_type v1 (init_list,tbb::cache_aligned_allocator<int>());                                                                            \
-    ASSERT(v1 == expected,"initialization via explicit constructor call with std::initializer_list failed");                                      \
-                                                                                                                                                  \
-    /*implicit constructor call test*/                                                                                                            \
-    container_type v = INIT_SEQ;                                                                                                                  \
-    ASSERT(v == expected,"init list constructor failed");                                                                                         \
-                                                                                                                                                  \
-    /*assignment operator test*/                                                                                                                  \
-    /*TODO: count created and destroyed injects to assert that no extra copy of vector was created implicitly*/                                   \
-    container_type va;                                                                                                                            \
-    va = INIT_SEQ;                                                                                                                                \
-    ASSERT(va == expected,"init list operator= failed");                                                                                          \
-                                                                                                                                                  \
-    container_type vae;                                                                                                                           \
-    vae.assign(INIT_SEQ);                                                                                                                         \
-    ASSERT(vae == expected,"init list assign failed");                                                                                            \
-}                                                                                                                                                 \
+#include <vector>
+#include "harness_defs.h" //for int_to_type
+
+namespace initializer_list_support_tests{
+    template<typename container_type, typename element_type>
+    void test_constructor(std::initializer_list<element_type> const& il, container_type const& expected){
+        container_type vd (il);
+        ASSERT(vd == expected,"initialization via explicit constructor call with init list failed");
+    }
+
+
+    template<typename container_type, typename element_type>
+    void test_assignment_operator(std::initializer_list<element_type> const& il, container_type const& expected){
+        container_type va;
+        va = il;
+        ASSERT(va == expected,"init list operator= failed");
+    }
+
+    template<typename container_type, typename element_type>
+    void test_assign(Harness::int_to_type<true>, std::initializer_list<element_type> const& il, container_type const& expected){
+        container_type vae;
+        vae.assign(il);
+        ASSERT(vae == expected,"init list assign(begin,end) failed");
+    }
+    template<typename container_type, typename element_type>
+    void test_assign(Harness::int_to_type<false>, std::initializer_list<element_type> const& , container_type const&){
+        //skip the test as container has no assign method
+    }
+
+    template <typename container_type, typename do_test_assign_t>
+    void TestInitListSupport(std::initializer_list<typename container_type::value_type> const& il, do_test_assign_t do_test_assign_p){
+        typedef typename container_type::value_type element_type;
+        std::vector<element_type> test_seq = il;
+        container_type expected(test_seq.begin(), test_seq.end());
+
+        test_constructor<container_type,element_type>(il, expected);
+        test_assignment_operator<container_type,element_type>(il, expected);
+        test_assign<container_type,element_type>(do_test_assign_p, il, expected);
+    }
+
+    template <typename container_type>
+    void TestInitListSupport(std::initializer_list<typename container_type::value_type> const& il ){
+        TestInitListSupport<container_type>(il, Harness::int_to_type<true>());
+    }
+
+    template <typename container_type>
+    void TestInitListSupportWithoutAssign(std::initializer_list<typename container_type::value_type> const& il ){
+        TestInitListSupport<container_type>(il, Harness::int_to_type<false>());
+    }
+
+
+    //TODO: add test for no leaks, and correct element lifetime
+    //the need for macro comes from desire to test different scenarios where initializer sequence is compile time constant
+    #define __TBB_TEST_INIT_LIST_SUITE_SINGLE(FUNC_NAME, CONTAINER, ELEMENT_TYPE, INIT_SEQ)                                                           \
+    void FUNC_NAME(){                                                                                                                                 \
+        typedef ELEMENT_TYPE element_type;                                                                                                            \
+        typedef CONTAINER<element_type> container_type;                                                                                               \
+        element_type test_seq[] = INIT_SEQ;                                                                                                           \
+        container_type expected(test_seq,test_seq + Harness::array_length(test_seq));                                                                          \
+                                                                                                                                                      \
+        /*test for explicit contructor call*/                                                                                                         \
+        container_type vd (INIT_SEQ);                                                                                                                 \
+        ASSERT(vd == expected,"initialization via explicit constructor call with init list failed");                                                  \
+        /*test for explicit contructor call with std::initializer_list*/                                                                              \
+                                                                                                                                                      \
+        std::initializer_list<element_type> init_list = INIT_SEQ;                                                                                     \
+        container_type v1 (init_list);                                                                                                                \
+        ASSERT(v1 == expected,"initialization via explicit constructor call with std::initializer_list failed");                                      \
+                                                                                                                                                      \
+        /*implicit constructor call test*/                                                                                                            \
+        container_type v = INIT_SEQ;                                                                                                                  \
+        ASSERT(v == expected,"init list constructor failed");                                                                                         \
+                                                                                                                                                      \
+        /*assignment operator test*/                                                                                                                  \
+        /*TODO: count created and destroyed injects to assert that no extra copy of vector was created implicitly*/                                   \
+        container_type va;                                                                                                                            \
+        va = INIT_SEQ;                                                                                                                                \
+        ASSERT(va == expected,"init list operator= failed");                                                                                          \
+        /*assign(begin,end) test*/                                                                                                                    \
+        container_type vae;                                                                                                                           \
+        vae.assign(INIT_SEQ);                                                                                                                         \
+        ASSERT(vae == expected,"init list assign(begin,end) failed");                                                                                 \
+    }                                                                                                                                                 \
+
+    namespace initializer_list_helpers{
+        template<typename T>
+        class ad_hoc_container{
+            std::vector<T> vec;
+            public:
+            ad_hoc_container(){}
+            template<typename InputIterator>
+            ad_hoc_container(InputIterator begin, InputIterator end) : vec(begin,end) {}
+            ad_hoc_container(std::initializer_list<T> const& il) : vec(il.begin(),il.end()) {}
+            ad_hoc_container(ad_hoc_container const& other) : vec(other.vec) {}
+            ad_hoc_container& operator=(ad_hoc_container const& rhs){ vec=rhs.vec; return *this;}
+            ad_hoc_container& operator=(std::initializer_list<T> const& il){ vec.assign(il.begin(),il.end()); return *this;}
+            template<typename InputIterator>
+            void assign(InputIterator begin, InputIterator end){ vec.assign(begin,end);}
+            void assign(std::initializer_list<T> const& il){ vec.assign(il.begin(),il.end());}
+            friend bool operator==(ad_hoc_container<T> const& lhs, ad_hoc_container<T> const& rhs){ return lhs.vec==rhs.vec;}
+        };
+    }
+
+    #define AD_HOC_INIT_SEQ {1,2,3,4}
+    __TBB_TEST_INIT_LIST_SUITE_SINGLE(TestCompilerSupportInt, initializer_list_helpers::ad_hoc_container, int, AD_HOC_INIT_SEQ )
+    #undef AD_HOC_INIT_SEQ
+    #define AD_HOC_PAIR_INIT_SEQ {{1,1}, {2,2},{3,3}, {4,4}}
+    #define AD_HOC_INIT_SEQ_PAIR_TYPE std::pair<int,int>
+    __TBB_TEST_INIT_LIST_SUITE_SINGLE(TestCompilerSupportIntPair, initializer_list_helpers::ad_hoc_container, AD_HOC_INIT_SEQ_PAIR_TYPE, AD_HOC_PAIR_INIT_SEQ )
+    #undef AD_HOC_INIT_SEQ_PAIR_TYPE
+    #undef AD_HOC_PAIR_INIT_SEQ
+
+
+    bool TestCompilerForInitializerList();
+    namespace  {
+        const bool conpiler_init_list_tests_are_run =  TestCompilerForInitializerList();
+    }
+
+    //TODO: move this to test_compiler
+    bool TestCompilerForInitializerList(){
+        TestCompilerSupportInt();
+        TestCompilerSupportIntPair();
+        tbb::internal::suppress_unused_warning(conpiler_init_list_tests_are_run);
+        return true;
+    }
+} // namespace initializer_list_support_tests
 
 #endif //__TBB_INITIALIZER_LISTS_PRESENT
 #endif //__TBB_test_initializer_list_H
diff --git a/src/test/test_malloc_compliance.cpp b/src/test/test_malloc_compliance.cpp
index 3f2366c..66a0068 100644
--- a/src/test/test_malloc_compliance.cpp
+++ b/src/test/test_malloc_compliance.cpp
@@ -114,7 +114,9 @@ void limitMem( size_t limit )
 #define HARNESS_CUSTOM_MAIN 1
 #include "harness.h"
 #include "harness_barrier.h"
+#if !__TBB_SOURCE_DIRECTLY_INCLUDED
 #include "harness_tbb_independence.h"
+#endif
 #if __linux__
 #include <stdint.h> // uintptr_t
 #endif
diff --git a/src/test/test_malloc_init_shutdown.cpp b/src/test/test_malloc_init_shutdown.cpp
index b52fc5e..35b3d14 100644
--- a/src/test/test_malloc_init_shutdown.cpp
+++ b/src/test/test_malloc_init_shutdown.cpp
@@ -32,7 +32,9 @@
 
 #include "harness.h"
 #include "harness_barrier.h"
+#if !__TBB_SOURCE_DIRECTLY_INCLUDED
 #include "harness_tbb_independence.h"
+#endif
 
 tbb::atomic<int> FinishedTasks;
 const int MaxTasks = 16;
diff --git a/src/test/test_malloc_pools.cpp b/src/test/test_malloc_pools.cpp
index 341fb62..08d59af 100644
--- a/src/test/test_malloc_pools.cpp
+++ b/src/test/test_malloc_pools.cpp
@@ -30,7 +30,9 @@
 #include "tbb/atomic.h"
 #include "harness.h"
 #include "harness_barrier.h"
+#if !__TBB_SOURCE_DIRECTLY_INCLUDED
 #include "harness_tbb_independence.h"
+#endif
 
 template<typename T>
 static inline T alignUp  (T arg, uintptr_t alignment) {
@@ -478,6 +480,28 @@ static void TestEntries()
     pool_destroy(pool);
 }
 
+static void TestPoolCreation()
+{
+    using namespace rml;
+
+    putMemCalls = getMemCalls = 0;
+
+    MemPoolPolicy nullPolicy(NULL, putMemPolicy),
+        emptyFreePolicy(getMemPolicy, NULL),
+        okPolicy(getMemPolicy, putMemPolicy);
+    MemoryPool *pool;
+
+    MemPoolError res = pool_create_v1(0, &nullPolicy, &pool);
+    ASSERT(res==INVALID_POLICY, "pool with empty pAlloc can't be created");
+    res = pool_create_v1(0, &emptyFreePolicy, &pool);
+    ASSERT(res==INVALID_POLICY, "pool with empty pFree can't be created");
+    ASSERT(!putMemCalls && !getMemCalls, "no callback calls are expected");
+    res = pool_create_v1(0, &okPolicy, &pool);
+    ASSERT(res==POOL_OK, NULL);
+    pool_destroy(pool);
+    ASSERT(putMemCalls == getMemCalls, "no leaks after pool_destroy");
+}
+
 int TestMain () {
     TestTooSmallBuffer();
     TestPoolReset();
@@ -487,6 +511,7 @@ int TestMain () {
     TestPoolGranularity();
     TestPoolKeepTillDestroy();
     TestEntries();
+    TestPoolCreation();
 
     return Harness::Done;
 }
diff --git a/src/test/test_malloc_pure_c.c b/src/test/test_malloc_pure_c.c
index 4ef6bb2..0a7957c 100644
--- a/src/test/test_malloc_pure_c.c
+++ b/src/test/test_malloc_pure_c.c
@@ -41,21 +41,23 @@
 
 #if __linux__
 /* huge pages supported only under Linux so far */
-void CheckReturnCode(int ret) { assert(!ret); }
+const int ExpectedResultHugePages = TBBMALLOC_OK;
 #else
-void CheckReturnCode(int ret) { assert( ret); }
+const int ExpectedResultHugePages = TBBMALLOC_NO_EFFECT;
 #endif
 
 int main(void) {
     size_t i, j;
-    int curr_mode;
+    int curr_mode, res;
     void *p1, *p2;
 
     for ( curr_mode = 0; curr_mode<=1; curr_mode++) {
-        CheckReturnCode(scalable_allocation_mode(USE_HUGE_PAGES, !curr_mode));
+        assert(ExpectedResultHugePages ==
+               scalable_allocation_mode(TBBMALLOC_USE_HUGE_PAGES, !curr_mode));
         p1 = scalable_malloc(10*1024*1024);
         assert(p1);
-        CheckReturnCode(scalable_allocation_mode(USE_HUGE_PAGES, curr_mode));
+        assert(ExpectedResultHugePages ==
+               scalable_allocation_mode(TBBMALLOC_USE_HUGE_PAGES, curr_mode));
         scalable_free(p1);
     }
     /* note that huge pages (if supported) are still enabled at this point */
@@ -93,6 +95,14 @@ int main(void) {
     }
     scalable_free(p1);
     scalable_free(p2);
+    res = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL);
+    assert(res == TBBMALLOC_OK);
+    res = scalable_allocation_command(TBBMALLOC_CLEAN_THREAD_BUFFERS, NULL);
+    /* expect all caches cleaned before, so got nothing from CLEAN_THREAD_BUFFERS */
+    assert(res == TBBMALLOC_NO_EFFECT);
+    /* check that invalid param argument give expected result*/
+    res = scalable_allocation_command(TBBMALLOC_CLEAN_THREAD_BUFFERS, (void*)1);
+    assert(res == TBBMALLOC_INVALID_PARAM);
     printf("done\n");
     return 0;
 }
diff --git a/src/test/test_malloc_whitebox.cpp b/src/test/test_malloc_whitebox.cpp
index a642b74..7bf2061 100644
--- a/src/test/test_malloc_whitebox.cpp
+++ b/src/test/test_malloc_whitebox.cpp
@@ -216,20 +216,24 @@ public:
     }
 };
 
-class FreeBlockPoolHit: NoAssign {
-    // to trigger possible leak for both cleanup on pool overflow 
-    // and on thread termination
-    static const int ITERS = 2*FreeBlockPool::POOL_HIGH_MARK;
+class LocalCachesHit: NoAssign {
+    // set ITERS to trigger possible leak of backreferences
+    // during cleanup on cache overflow and on thread termination
+    static const int ITERS = 2*(FreeBlockPool::POOL_HIGH_MARK +
+                                LocalLOC::LOC_HIGH_MARK);
 public:
-    FreeBlockPoolHit() {}
+    LocalCachesHit() {}
     void operator()(int) const {
-        void *objs[ITERS];
-
-        for (int i=0; i<ITERS; i++)
-            objs[i] = scalable_malloc(minLargeObjectSize-1);
-        for (int i=0; i<ITERS; i++)
-            scalable_free(objs[i]);
+        void *objsSmall[ITERS], *objsLarge[ITERS];
 
+        for (int i=0; i<ITERS; i++) {
+            objsSmall[i] = scalable_malloc(minLargeObjectSize-1);
+            objsLarge[i] = scalable_malloc(minLargeObjectSize);
+        }
+        for (int i=0; i<ITERS; i++) {
+            scalable_free(objsSmall[i]);
+            scalable_free(objsLarge[i]);
+        }
 #ifdef USE_WINTHREAD
         // Under Windows DllMain is used for mallocThreadShutdownNotification
         // calling. As DllMain is not used during whitebox testing,
@@ -247,11 +251,6 @@ static size_t allocatedBackRefCount()
     return cnt;
 }
 
-static void cleanObjectCache()
-{
-    defaultMemPool->extMemPool.hardCachesCleanup();
-}
-
 class TestInvalidBackrefs: public SimpleBarrier {
 #if __ANDROID__
     // Android requires lower iters due to lack of virtual memory.
@@ -313,14 +312,16 @@ void TestBackRef() {
     int sustLastUsed = backRefMaster->lastUsed;
     NativeParallelFor( 1, BackRefWork() );
     ASSERT(sustLastUsed == backRefMaster->lastUsed, "backreference leak detected");
-    
-    // check leak of back references while per-thread small object pool is in use
-    // warm up need to cover bootStrapMalloc call
-    NativeParallelFor( 1, FreeBlockPoolHit() );
+
+    // check leak of back references while per-thread caches are in use
+    // warm up needed to cover bootStrapMalloc call
+    NativeParallelFor( 1, LocalCachesHit() );
     beforeNumBackRef = allocatedBackRefCount();
-    NativeParallelFor( 1, FreeBlockPoolHit() );
+    NativeParallelFor( 2, LocalCachesHit() );
+    int res = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL);
+    ASSERT(res == TBBMALLOC_OK, NULL);
     afterNumBackRef = allocatedBackRefCount();
-    ASSERT(beforeNumBackRef==afterNumBackRef, "backreference leak detected");
+    ASSERT(beforeNumBackRef>=afterNumBackRef, "backreference leak detected");
 
     // This is a regression test against race condition between backreference
     // extension and checking invalid BackRefIdx.
@@ -377,6 +378,21 @@ int putMallocMem(intptr_t /*pool_id*/, void *ptr, size_t bytes)
     return 0;
 }
 
+class StressLOCacheWork: NoAssign {
+    rml::MemoryPool *mallocPool;
+public:
+    StressLOCacheWork(rml::MemoryPool *mallocPool) : mallocPool(mallocPool) {}
+    void operator()(int) const {
+        for (size_t sz=minLargeObjectSize; sz<1*1024*1024;
+             sz+=LargeObjectCache::largeBlockCacheStep) {
+            void *ptr = pool_malloc(mallocPool, sz);
+            ASSERT(ptr, "Memory was not allocated");
+            memset(ptr, sz, sz);
+            pool_free(mallocPool, ptr);
+        }
+    }
+};
+
 void TestPools() {
     rml::MemPoolPolicy pol(getMem, putMem);
     size_t beforeNumBackRef, afterNumBackRef;
@@ -388,7 +404,7 @@ void TestPools() {
     pool_destroy(pool1);
     pool_destroy(pool2);
 
-    cleanObjectCache();
+    scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL);
     beforeNumBackRef = allocatedBackRefCount();
     rml::MemoryPool *fixedPool;
 
@@ -440,16 +456,12 @@ void TestPools() {
     pool_free(fixedPool, largeObj);
 
     // provoke large object cache cleanup and hope no leaks occurs
-    for (size_t sz=minLargeObjectSize; sz<1*1024*1024; sz+=LargeObjectCache::largeBlockCacheStep) {
-        ptr = pool_malloc(mallocPool, sz);
-        ASSERT(ptr, "Memory was not allocated");
-        memset(ptr, sz, sz);
-        pool_free(mallocPool, ptr);
-    }
+    for( int p=MaxThread; p>=MinThread; --p )
+        NativeParallelFor( p, StressLOCacheWork(mallocPool) );
     pool_destroy(mallocPool);
     pool_destroy(fixedPool);
 
-    cleanObjectCache();
+    scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL);
     afterNumBackRef = allocatedBackRefCount();
     ASSERT(beforeNumBackRef==afterNumBackRef, "backreference leak detected");
 
@@ -468,7 +480,7 @@ void TestPools() {
         ASSERT(loc->getUsedSize(), NULL);
         pool_free(mallocPool, p[3]);
         ASSERT(loc->getLOCSize() < 3*(minLargeObjectSize+LargeObjectCache::largeBlockCacheStep), NULL);
-        const size_t maxLocalLOCSize = LocalLOC<3,30>::getMaxSize();
+        const size_t maxLocalLOCSize = LocalLOCImpl<3,30>::getMaxSize();
         ASSERT(loc->getUsedSize() <= maxLocalLOCSize, NULL);
         for (int i=0; i<3; i++)
             p[i] = pool_malloc(mallocPool, minLargeObjectSize+i*LargeObjectCache::largeBlockCacheStep);
@@ -485,7 +497,7 @@ void TestPools() {
     // To test LOC we need bigger lists than released by current LocalLOC
     //   in production code. Create special LocalLOC.
     {
-        LocalLOC<2, 20> lLOC;
+        LocalLOCImpl<2, 20> lLOC;
         pool_create_v1(0, &pol, &mallocPool);
         rml::internal::ExtMemoryPool *mPool = &((rml::internal::MemoryPool*)mallocPool)->extMemPool;
         const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc;
@@ -498,7 +510,7 @@ void TestPools() {
             ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool);
             ASSERT(ret, NULL);
         }
-        lLOC.clean(mPool);
+        lLOC.externalCleanup(mPool);
         ASSERT(!loc->getUsedSize(), NULL);
 
         pool_destroy(mallocPool);
diff --git a/src/test/test_mutex.cpp b/src/test/test_mutex.cpp
index d35d9fe..6820c35 100644
--- a/src/test/test_mutex.cpp
+++ b/src/test/test_mutex.cpp
@@ -178,7 +178,7 @@ namespace tbb {
 /** Does not test features specific to reader-writer locks. */
 template<typename M>
 void Test( const char * name ) {
-    REMARK("%s time = ",name);
+    REMARK("%s size == %d, time = ",name, sizeof(M));
     Counter<M> counter;
     counter.value = 0;
     tbb::profiling::set_name(counter.mutex, name);
@@ -499,7 +499,7 @@ void TestNullMutex( const char * name ) {
     Counter<M> counter;
     counter.value = 0;
     const int n = 100;
-    REMARK("%s ",name);
+    REMARK("TestNullMutex<%s>",name);
     {
         tbb::parallel_for(tbb::blocked_range<size_t>(0,n,10),AddOne<Counter<M> >(counter));
     }
@@ -507,15 +507,16 @@ void TestNullMutex( const char * name ) {
     {
         tbb::parallel_for(tbb::blocked_range<size_t>(0,n,10),NullRecursive<Counter<M> >(counter));
     }
-
+    REMARK("\n");
 }
 
 template<typename M>
 void TestNullRWMutex( const char * name ) {
-    REMARK("%s ",name);
+    REMARK("TestNullRWMutex<%s>",name);
     const int n = 100;
     M m;
     tbb::parallel_for(tbb::blocked_range<size_t>(0,n,10),NullUpgradeDowngrade<M>(m, name));
+    REMARK("\n");
 }
 
 //! Test ISO C++0x compatibility portion of TBB mutex
@@ -547,8 +548,54 @@ void TestRecursiveMutexISO( const char * name ) {
     TestRecursiveMutex<tbb_from_iso>(name);
 }
 
+#include "harness_tsx.h"
 #include "tbb/task_scheduler_init.h"
 
+#if HAVE_TSX && __INTEL_COMPILER
+
+//! Function object for use with parallel_for.h to see if a transaction is actually attempted.
+tbb::atomic<size_t> n_transactions_attempted;
+template<typename C>
+struct AddOne_CheckTransaction: NoAssign {
+    C& counter;
+    /** Increments counter once for each iteration in the iteration space. */
+    void operator()( tbb::blocked_range<size_t>& range ) const {
+        for( size_t i=range.begin(); i!=range.end(); ++i ) {
+            bool transaction_attempted = false;
+            {
+              typename C::mutex_type::scoped_lock lock(counter.mutex);
+              if( IsInsideTx() ) transaction_attempted = true;
+              counter.value = counter.value+1;
+            }
+            if( transaction_attempted ) ++n_transactions_attempted;
+            __TBB_Pause(i);
+        }
+    }
+    AddOne_CheckTransaction( C& counter_ ) : counter(counter_) {}
+};
+
+template<typename M>
+void TestTransaction( const char * name )
+{
+    Counter<M> counter;
+    const int n = 100;
+    REMARK("TestTransaction with %s\n",name);
+
+    n_transactions_attempted = 0;
+    for( int i=0; i<5 && n_transactions_attempted==0; ++i ) {
+        counter.value = 0;
+        tbb::parallel_for(tbb::blocked_range<size_t>(0,n,2),AddOne_CheckTransaction<Counter<M> >(counter));
+        if( counter.value!=n ) {
+            REPORT("ERROR for %s: counter.value=%ld\n",name,counter.value);
+            break;
+        }
+    }
+
+    if( n_transactions_attempted==0 )
+        REPORT( "ERROR: HLE transactions are never attempted\n" );
+}
+#endif
+
 int TestMain () {
     for( int p=MinThread; p<=MaxThread; ++p ) {
         tbb::task_scheduler_init init( p );
@@ -566,6 +613,7 @@ int TestMain () {
             TestNullMutex<tbb::null_rw_mutex>( "Null RW Mutex" );
             TestNullRWMutex<tbb::null_rw_mutex>( "Null RW Mutex" );
             Test<tbb::spin_mutex>( "Spin Mutex" );
+            Test<tbb::speculative_spin_mutex>( "Spin Mutex/speculative" );
 #if _OPENMP
             Test<OpenMP_Mutex>( "OpenMP_Mutex" );
 #endif /* _OPENMP */
@@ -576,6 +624,7 @@ int TestMain () {
             Test<tbb::spin_rw_mutex>( "Spin RW Mutex" );
 
             TestTryAcquire_OneThread<tbb::spin_mutex>("Spin Mutex");
+            TestTryAcquire_OneThread<tbb::speculative_spin_mutex>("Spin Mutex/speculative");
             TestTryAcquire_OneThread<tbb::queuing_mutex>("Queuing Mutex");
 #if USE_PTHREAD
             // under ifdef because on Windows tbb::mutex is reenterable and the test will fail
@@ -611,5 +660,13 @@ int TestMain () {
         }
         REMARK( "calling destructor for task_scheduler_init\n" );
     }
+
+#if HAVE_TSX && __INTEL_COMPILER
+    // additional test for speculative_spin_mutex to see if we actually attempt lock elisions
+    if( have_TSX() ) {
+        tbb::task_scheduler_init init( MaxThread );
+        TestTransaction<tbb::speculative_spin_mutex>( "Spin Mutex/speculative" );
+    }
+#endif
     return Harness::Done;
 }
diff --git a/src/test/test_priority_queue_node.cpp b/src/test/test_priority_queue_node.cpp
index 2efecf8..613dc1d 100644
--- a/src/test/test_priority_queue_node.cpp
+++ b/src/test/test_priority_queue_node.cpp
@@ -335,7 +335,7 @@ int test_serial() {
 
 int TestMain() { 
     tbb::tick_count start = tbb::tick_count::now(), stop;
-    for (int p = 1; p < 4; ++p) {
+    for (int p = 2; p <= 4; ++p) {
         tbb::task_scheduler_init init(p);
         test_serial<int>();
         test_reservation<int>(p);
diff --git a/src/test/test_queue_node.cpp b/src/test/test_queue_node.cpp
index 0f293cb..6b8d744 100644
--- a/src/test/test_queue_node.cpp
+++ b/src/test/test_queue_node.cpp
@@ -448,7 +448,7 @@ int test_serial() {
 
 int TestMain() { 
     tbb::tick_count start = tbb::tick_count::now(), stop;
-    for (int p = 1; p < 4; ++p) {
+    for (int p = 2; p <= 4; ++p) {
         tbb::task_scheduler_init init(p);
         test_serial<int>();
         test_parallel<int>(p);
diff --git a/src/test/test_sequencer_node.cpp b/src/test/test_sequencer_node.cpp
index c4f0980..5bdccd6 100644
--- a/src/test/test_sequencer_node.cpp
+++ b/src/test/test_sequencer_node.cpp
@@ -393,7 +393,7 @@ int test_serial() {
 
 int TestMain() { 
     tbb::tick_count start = tbb::tick_count::now(), stop;
-    for (int p = 1; p < 4; ++p) {
+    for (int p = 2; p <= 4; ++p) {
         tbb::task_scheduler_init init(p);
         test_serial<int>();
         test_parallel<int>(p);
diff --git a/src/test/test_task_group.cpp b/src/test/test_task_group.cpp
index 4127dce..1498f88 100644
--- a/src/test/test_task_group.cpp
+++ b/src/test/test_task_group.cpp
@@ -842,11 +842,12 @@ int TestMain () {
         TestEh2();
         TestStructuredWait();
         TestStructuredCancellation2<true>();
-        //this condition can not be moved harness_defs.h as the only way to detect std C++ library is to include something from it.
         //TODO: recheck the condition with newer versions of clang/libc++
-#if (__clang__ && _LIBCPP_VERSION && __GXX_EXPERIMENTAL_CXX0X__)
-        //TODO:it seems that clang with libc++ in C++11 mode does not expect exception
+#if (__clang__ && (__cplusplus >= 201103L || _LIBCPP_VERSION ))
+        //TODO:it seems that clang in C++11 mode does not expect exception
         //coming from destructor in the following test as it does not generate correct code for stack unwinding.
+        //TODO:it seems that libc++ implementation of std::uncaught_exception return incorrect value clang and
+        //icc14 in the following test
         REPORT("Known issue: TestStructuredCancellation2<false> test is skipped.\n");
 #else
         TestStructuredCancellation2<false>();
diff --git a/src/test/test_task_priority.cpp b/src/test/test_task_priority.cpp
index 352fd3b..aa7062b 100644
--- a/src/test/test_task_priority.cpp
+++ b/src/test/test_task_priority.cpp
@@ -224,8 +224,8 @@ public:
         tbb::empty_task &r = *new( tbb::task::allocate_root(ctx) ) tbb::empty_task;
         const int R = 4;
         r.set_ref_count( R * P + 1 );
-        // Only thread 1 changes its task tree priority in preemption test mode
-        uintptr_t opts = m_opts & (id == PreemptionActivatorId ? ~0u : ~(uintptr_t)TestPreemption);
+        // Only PreemptionActivator thread changes its task tree priority in preemption test mode
+        const uintptr_t opts = (id == PreemptionActivatorId) ? m_opts : (m_opts & ~(uintptr_t)TestPreemption);
         for ( int i = 0; i < R; ++i ) {
             for ( int j = 1; j < P; ++j )
                 r.spawn( *new(r.allocate_child()) NodeType(id, MinBaseDepth + id, opts, &r) );
@@ -409,6 +409,7 @@ void TestPriorityAssertions () {
 }
 
 #if __TBB_TASK_PRIORITY
+
 tbb::atomic<tbb::priority_t> g_order;
 tbb::atomic<bool> g_order_established;
 class OrderedTask : public tbb::task {
@@ -452,6 +453,92 @@ void TestEnqueueOrder () {
     while( g_order == tbb::priority_low ) __TBB_Yield();
     while( g_order != tbb::priority_low ) __TBB_Yield();
 }
+
+namespace test_propagation {
+
+// This test creates two binary trees of task_group_context objects.
+// Indices in a binary tree have the following layout:
+//  [1]--> [2] -> [4],[5]
+//     \-> [3] -> [6],[7]
+static const int first = 1, last = 7;
+tbb::task_group_context* g_trees[2][/*last+1*/8];
+tbb::task_group_context* g_default_ctx;
+tbb::atomic<int> g_barrier;
+tbb::atomic<bool> is_finished;
+
+class TestSetPriorityTask : public tbb::task {
+    const int m_tree, m_i;
+public:
+    TestSetPriorityTask(int t, int i) : m_tree(t), m_i(i) {}
+    tbb::task* execute() {
+        if( !m_i ) { // the first task creates two trees
+            g_default_ctx = group();
+            for( int i = 0; i <= 1; ++i ) {
+                g_trees[i][1] = new tbb::task_group_context( tbb::task_group_context::isolated );
+                tbb::task::spawn(*new(tbb::task::allocate_root(*g_trees[i][1])) TestSetPriorityTask(i, 1));
+            }
+        }
+        else if( m_i <= last/2 ) { // is divisible
+            for( int i = 0; i <= 1; ++i ) {
+                const int index = 2*m_i + i;
+                g_trees[m_tree][index] = new tbb::task_group_context ( tbb::task_group_context::bound );
+                tbb::task::spawn(*new(tbb::task::allocate_root(*g_trees[m_tree][index])) TestSetPriorityTask(m_tree, index));
+            }
+        }
+        --g_barrier;
+        //REMARK("Task %i executing\n", m_i);
+        while (!is_finished) __TBB_Yield();
+        change_group(*g_default_ctx); // avoid races with destruction of custom contexts
+        --g_barrier;
+        return NULL;
+    }
+};
+
+// Tests task_group_context state propagation, also for cancellation.
+void TestSetPriority() {
+    REMARK("Testing set_priority() with existing forest\n");
+    const int workers = last*2+1; // +1 is worker thread executing the first task
+    tbb::task_scheduler_init init(workers+1); // +1 is master thread
+    g_barrier = workers;
+    is_finished = false;
+    tbb::task::spawn(*new(tbb::task::allocate_root()) TestSetPriorityTask(0,0));
+    while(g_barrier) __TBB_Yield();
+    g_trees[0][2]->set_priority(tbb::priority_high);
+    g_trees[0][4]->set_priority(tbb::priority_normal);
+    g_trees[1][3]->set_priority(tbb::priority_high); // Regression test: it must not set priority_high to g_trees[0][4]
+    //                                         -  1  2  3  4  5  6  7
+    const int expected_priority[2][last+1] = {{0, 0, 1, 0, 0, 1, 0, 0},
+                                              {0, 0, 0, 1, 0, 0, 1, 1}};
+    for (int t = 0; t < 2; ++t)
+        for (int i = first; i <= last; ++i) {
+            REMARK("\r                    \rTask %i... ", i);
+            ASSERT(g_trees[t][i]->priority() == expected_priority[t][i]? tbb::priority_high : tbb::priority_normal, NULL);
+            REMARK("OK");
+        }
+    REMARK("\r                    \r");
+    REMARK("Also testing cancel_group_execution()\n"); // cancellation shares propagation logic with set_priority() but there are also differences
+    g_trees[0][4]->cancel_group_execution();
+    g_trees[0][5]->cancel_group_execution();
+    g_trees[1][3]->cancel_group_execution();
+    //                                             -  1  2  3  4  5  6  7
+    const int expected_cancellation[2][last+1] = {{0, 0, 0, 0, 1, 1, 0, 0},
+                                                  {0, 0, 0, 1, 0, 0, 1, 1}};
+    for (int t = 0; t < 2; ++t)
+        for (int i = first; i <= last; ++i) {
+            REMARK("\r                    \rTask %i... ", i);
+            ASSERT( g_trees[t][i]->is_group_execution_cancelled() == (expected_cancellation[t][i]==1), NULL);
+            REMARK("OK");
+        }
+    REMARK("\r                    \r");
+    g_barrier = workers;
+    is_finished = true;
+    REMARK("waiting tasks to terminate\n");
+    while(g_barrier) __TBB_Yield();
+    for (int t = 0; t < 2; ++t)
+        for (int i = first; i <= last; ++i)
+            delete g_trees[t][i];
+}
+}//namespace test_propagation
 #endif /* __TBB_TASK_PRIORITY */
 
 #if !__TBB_TEST_SKIP_AFFINITY
@@ -465,6 +552,7 @@ int TestMain () {
 #if !__TBB_TASK_PRIORITY
     REMARK( "Priorities disabled: Running as just yet another task scheduler test\n" );
 #else
+    test_propagation::TestSetPriority(); // TODO: move down when bug 1996 is fixed
     TestEnqueueOrder();
 #endif /* __TBB_TASK_PRIORITY */
     TestPriorityAssertions();
diff --git a/src/test/test_tbb_version.cpp b/src/test/test_tbb_version.cpp
index 619118f..2e233eb 100644
--- a/src/test/test_tbb_version.cpp
+++ b/src/test/test_tbb_version.cpp
@@ -245,8 +245,8 @@ int main(int argc, char *argv[] ) {
 // Fill dictionary with version strings for platforms 
 void initialize_strings_vector(std::vector <string_pair>* vector)
 {
-    vector->push_back(string_pair("TBB: VERSION\t\t4.1", required));          // check TBB_VERSION
-    vector->push_back(string_pair("TBB: INTERFACE VERSION\t6105", required)); // check TBB_INTERFACE_VERSION
+    vector->push_back(string_pair("TBB: VERSION\t\t4.2", required));          // check TBB_VERSION
+    vector->push_back(string_pair("TBB: INTERFACE VERSION\t7000", required)); // check TBB_INTERFACE_VERSION
     vector->push_back(string_pair("TBB: BUILD_DATE", required));
     vector->push_back(string_pair("TBB: BUILD_HOST", required));
     vector->push_back(string_pair("TBB: BUILD_OS", required));

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/tbb.git



More information about the debian-science-commits mailing list