[tbb] 36/64: Imported Upstream version 4.1~20130613

Graham Inggs ginggs at moszumanska.debian.org
Mon Jul 3 12:28:00 UTC 2017


This is an automated email from the git hooks/post-receive script.

ginggs pushed a commit to branch master
in repository tbb.

commit 5143d747bb16794990d906550cc37b1dd061dc02
Author: Graham Inggs <ginggs at debian.org>
Date:   Mon Jul 3 14:13:54 2017 +0200

    Imported Upstream version 4.1~20130613
---
 CHANGES                                            |  27 ++
 Makefile                                           |   2 +-
 README                                             |   2 +-
 build/FreeBSD.gcc.inc                              |   2 +-
 build/Makefile.test                                |  74 ++++--
 build/android.gcc.inc                              |   2 +-
 build/common_rules.inc                             |  19 +-
 build/generate_tbbvars.sh                          |  25 +-
 build/linux.gcc.inc                                |   4 +-
 build/linux.icc.inc                                |   5 +-
 build/macos.icc.inc                                |   2 +-
 build/macos.inc                                    |   2 +-
 build/mic.icc.inc                                  |   2 +-
 build/mic.linux.inc                                |   2 +-
 build/windows.icl.inc                              |   2 +-
 build/windows.inc                                  |   5 +-
 doc/html/a00031.html                               |   4 +
 doc/html/a00169.html                               |   1 +
 doc/html/a00268.html                               |   3 +
 doc/html/a00285.html                               |  17 ++
 doc/html/a00428.html                               |   2 +-
 doc/html/functions_0x61.html                       |   2 +-
 doc/html/functions_0x6f.html                       |   2 +-
 doc/html/functions_func.html                       |   2 +-
 doc/html/functions_func_0x6f.html                  |   2 +-
 .../GettingStarted/sub_string_finder/index.html    |   2 +-
 examples/common/copy_libraries.bat                 |  25 +-
 examples/common/gui/Makefile.gmake                 |   4 +-
 .../concurrent_hash_map/count_strings/index.html   |   2 +-
 .../concurrent_priority_queue/shortpath/index.html |   2 +-
 examples/graph/binpack/index.html                  |   2 +-
 examples/graph/dining_philosophers/index.html      |   2 +-
 examples/graph/logic_sim/index.html                |   2 +-
 examples/index.html                                |  18 +-
 examples/parallel_do/parallel_preorder/index.html  |   2 +-
 examples/parallel_for/game_of_life/index.html      |   2 +-
 examples/parallel_for/polygon_overlay/Makefile     |   6 +-
 examples/parallel_for/polygon_overlay/index.html   |   2 +-
 examples/parallel_for/seismic/Makefile             |   6 +-
 examples/parallel_for/seismic/index.html           |   4 +-
 examples/parallel_for/tachyon/Makefile             |   2 +-
 examples/parallel_for/tachyon/index.html           |   2 +-
 examples/parallel_reduce/convex_hull/index.html    |   2 +-
 examples/parallel_reduce/primes/index.html         |   2 +-
 examples/pipeline/square/index.html                |   2 +-
 examples/task/tree_sum/index.html                  |   2 +-
 examples/task_group/sudoku/index.html              |   2 +-
 examples/task_priority/fractal/Makefile            |   6 +-
 examples/task_priority/fractal/index.html          |   2 +-
 examples/test_all/fibonacci/index.html             |   2 +-
 include/tbb/compat/tuple                           | 294 ++++++++++-----------
 include/tbb/concurrent_hash_map.h                  |   9 +-
 include/tbb/concurrent_priority_queue.h            |  38 ++-
 include/tbb/flow_graph.h                           |  47 ++--
 include/tbb/internal/_concurrent_queue_impl.h      |  10 +-
 include/tbb/internal/_flow_graph_node_impl.h       |   4 +
 include/tbb/machine/linux_ia32.h                   |   2 +-
 include/tbb/machine/macos_common.h                 |   8 +-
 include/tbb/machine/mic_common.h                   |   2 +-
 include/tbb/machine/windows_ia32.h                 |   2 +-
 include/tbb/memory_pool.h                          |   2 +-
 include/tbb/partitioner.h                          |   2 +-
 include/tbb/tbb_config.h                           |   2 +-
 include/tbb/tbb_machine.h                          |  37 ++-
 include/tbb/tbb_stddef.h                           |   4 +-
 src/Makefile                                       |   4 +
 src/index.html                                     |   2 +-
 src/old/concurrent_queue_v2.cpp                    |  13 +-
 src/old/concurrent_vector_v2.cpp                   |   8 +-
 src/old/spin_rw_mutex_v2.cpp                       |   8 +-
 src/rml/server/irml.rc                             |   2 +-
 src/rml/server/rml_server.cpp                      |  18 +-
 src/rml/test/test_thread_monitor.cpp               |   2 +-
 src/tbb/cilk-tbb-interop.h                         |   3 +-
 src/tbb/concurrent_queue.cpp                       |  15 +-
 src/tbb/dynamic_link.h                             |   2 +-
 src/tbb/governor.cpp                               |   2 +-
 src/tbb/index.html                                 |   2 +-
 src/tbb/mac32-tbb-export.lst                       |   2 +-
 src/tbb/mac64-tbb-export.lst                       |   2 +-
 src/tbb/mailbox.h                                  |   3 +-
 src/tbb/queuing_rw_mutex.cpp                       |   4 +-
 src/tbb/scheduler.cpp                              |  45 ++--
 src/tbb/spin_rw_mutex.cpp                          |   6 +-
 src/tbb/tbb_assert_impl.h                          |   3 +-
 src/tbb/tbb_misc.cpp                               |  14 +-
 src/tbb/tbb_resource.rc                            |   2 +-
 src/tbb/tools_api/ittnotify.h                      |   3 +-
 src/tbb/tools_api/ittnotify_config.h               |   2 +-
 src/tbbmalloc/MapMemory.h                          |   2 +-
 src/tbbmalloc/backend.cpp                          |  13 -
 src/tbbmalloc/frontend.cpp                         |  31 ++-
 src/tbbmalloc/tbbmalloc.cpp                        |  12 +-
 src/tbbmalloc/tbbmalloc_internal.h                 | 135 +++++-----
 src/test/harness.h                                 | 105 ++++++--
 src/test/harness_allocator.h                       |   2 +-
 src/test/harness_concurrency.h                     | 113 ++++++++
 src/test/harness_defs.h                            |   8 +-
 src/test/harness_graph.h                           |   4 +-
 src/test/harness_inject_scheduler.h                |  12 +
 src/test/harness_memory.h                          |   2 +-
 src/test/test_atomic.cpp                           |  24 +-
 src/test/test_cilk_dynamic_load.cpp                |   8 +-
 src/test/test_concurrent_hash_map.cpp              |   4 +-
 src/test/test_concurrent_priority_queue.cpp        |  98 ++++++-
 src/test/test_concurrent_unordered.cpp             |   9 +-
 src/test/test_concurrent_vector.cpp                |  35 +--
 src/test/test_fast_random.cpp                      |  20 +-
 src/test/test_hw_concurrency.cpp                   |  59 +----
 src/test/test_initializer_list.h                   |  71 +++++
 src/test/test_intrusive_list.cpp                   |   2 +-
 src/test/test_malloc_compliance.cpp                |   8 +-
 src/test/test_runtime_loader.cpp                   |   2 +-
 src/test/test_task_assertions.cpp                  |  20 +-
 src/test/test_task_enqueue.cpp                     |   2 +-
 src/test/test_task_group.cpp                       |   8 +
 src/test/test_task_leaks.cpp                       |  17 +-
 src/test/test_task_priority.cpp                    |  14 +-
 src/test/test_tbb_header.cpp                       |   6 +
 src/test/test_tbb_version.cpp                      |   2 +-
 120 files changed, 1070 insertions(+), 725 deletions(-)

diff --git a/CHANGES b/CHANGES
index f4aad76..496693a 100644
--- a/CHANGES
+++ b/CHANGES
@@ -3,6 +3,33 @@ The list of most significant changes made over time in
 Intel(R) Threading Bulding Blocks (Intel(R) TBB).
 ------------------------------------------------------------------------
 
+Intel TBB 4.1 Update 4
+TBB_INTERFACE_VERSION == 6105
+
+Changes (w.r.t. Intel TBB 4.1 Update 3):
+
+- Use /volatile:iso option with VS 2012 to disable extended
+    semantics for volatile variables.
+- Various improvements in affinity_partitioner, scheduler,
+    tests, examples, makefiles.
+- Concurrent_priority_queue class now supports initialization/assignment
+    via C++11 initializer list feature (std::initializer_list<T>).
+
+Bugs fixed:
+
+- Fixed more possible stalls in concurrent invocations of
+    task_arena::execute(), especially waiting for enqueued tasks.
+- Fixed requested number of workers for task_arena(P,0).
+- Fixed interoperability with Intel(R) VTune(TM) Amplifier XE in
+    case of using task_arena::enqueue() from a terminating thread.
+
+Open-source contributions integrated:
+
+- Type fixes, cleanups, and code beautification by Raf Schietekat.
+- Improvements in atomic operations for big endian platforms
+    by Raf Schietekat.
+
+------------------------------------------------------------------------
 Intel TBB 4.1 Update 3
 TBB_INTERFACE_VERSION == 6103
 
diff --git a/Makefile b/Makefile
index 8616ffc..38dc03f 100644
--- a/Makefile
+++ b/Makefile
@@ -29,7 +29,7 @@ include $(tbb_root)/build/common.inc
 .PHONY: default all tbb tbbmalloc tbbproxy test examples
 
 #workaround for non-depend targets tbb and tbbmalloc which both depend on version_string.ver
-#According to documentation submakes should run in parallel
+#According to documentation, recursively invoked make commands can process their targets in parallel
 .NOTPARALLEL: tbb tbbmalloc tbbproxy
 
 default: tbb tbbmalloc $(if $(use_proxy),tbbproxy)
diff --git a/README b/README
index 67ab8ad..fcc87af 100644
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-Threading Building Blocks - README
+Intel(R) Threading Building Blocks - README
 
 See index.html for directions and documentation.
 
diff --git a/build/FreeBSD.gcc.inc b/build/FreeBSD.gcc.inc
index da461ee..2e280b3 100644
--- a/build/FreeBSD.gcc.inc
+++ b/build/FreeBSD.gcc.inc
@@ -57,7 +57,7 @@ TBB_ASM.OBJ=
 MALLOC_ASM.OBJ=
 
 ifeq (ia64,$(arch))
-# Position-independent code (PIC) is a must on IA-64, even for regular (not shared) executables
+# Position-independent code (PIC) is a must on IA-64 architecture, even for regular (not shared) executables
     CPLUS_FLAGS += $(PIC_KEY)
 endif 
 
diff --git a/build/Makefile.test b/build/Makefile.test
index 4d60149..e9b264c 100644
--- a/build/Makefile.test
+++ b/build/Makefile.test
@@ -42,10 +42,11 @@ DEBUG_SUFFIX=$(findstring _debug,$(call cross_cfg,_$(cfg)))
 #------------------------------------------------------------
 
 VPATH = $(tbb_root)/src/tbb/$(ASSEMBLY_SOURCE) $(tbb_root)/src/tbb $(tbb_root)/src/rml/client $(tbb_root)/src/old $(tbb_root)/src/test $(tbb_root)/src/perf
+TEST_USES_TBB=1 # it can be set to 0 for individual tests that do not depend on TBB
+CPLUS_FLAGS += $(if $(crosstest),$(DEFINE_KEY)__TBB_NO_IMPLICIT_LINKAGE=1) $(DEFINE_KEY)TEST_USES_TBB=$(TEST_USES_TBB)
 
-CPLUS_FLAGS += $(if $(crosstest),$(DEFINE_KEY)__TBB_NO_IMPLICIT_LINKAGE=1)
 ifdef use_proxy
-    USE_PROXY_FLAG = $(DEFINE_KEY)HARNESS_USE_PROXY
+    USE_PROXY_FLAG = $(DEFINE_KEY)HARNESS_USE_RUNTIME_LOADER
     CPLUS_FLAGS += $(USE_PROXY_FLAG)
     LINK_TBB.LIB = $(PROXY.LIB)
     LIBS += $(LIBDL)
@@ -53,19 +54,28 @@ endif
 
 include $(tbb_root)/build/common_rules.inc
 
+#$(1) - is the binary name
+#$(2) - is the input obj files 
+#$(3) - extra libs to link with
+define make-test-binary 
+	$(CPLUS) $(OUTPUT_KEY)$(strip $1) $(CPLUS_FLAGS) $(2) $(LINK_TBB.LIB) $(LIBS) $(3) $(LINK_FLAGS)
+endef 
+
 # Rule for generating executable test
 %.$(TEST_EXT): %.$(OBJ) $(TBB.LIB) $(if $(use_proxy),$(LINK_TBB.LIB))
-	$(CPLUS) $(OUTPUT_KEY)$@ $(CPLUS_FLAGS) $< $(LINK_TBB.LIB) $(LIBS) $(AUX_LIBS) $(LINK_FLAGS)
+	$(call make-test-binary,$@,$<,$(AUX_LIBS)) 
 
 # Rules for generating a test DLL
+%_dll.$(DLL): LINK_FLAGS += $(DYLIB_KEY)
+%_dll.$(DLL): CPLUS_FLAGS += $(PIC_KEY)
 %_dll.$(DLL): %_dll.$(OBJ) $(TBB.LIB)
-	$(CPLUS) $(OUTPUT_KEY)$@ $(CPLUS_FLAGS) $(PIC_KEY) $< $(LINK_TBB.LIB) $(LIBS) $(LINK_FLAGS) $(DYLIB_KEY)
+	$(call make-test-binary,$@,$<,)
 .PRECIOUS: %_dll.$(OBJ) %_dll.$(DLL)
 
 # Rules for the tests, which use TBB in a dynamically loadable library
 test_model_plugin.$(TEST_EXT): CPLUS_FLAGS := $(CPLUS_FLAGS:$(USE_PROXY_FLAG)=)
-test_model_plugin.$(TEST_EXT): test_model_plugin.$(OBJ) test_model_plugin_dll.$(DLL)
-	$(CPLUS) $(OUTPUT_KEY)$@ $(CPLUS_FLAGS) $< $(LIBDL) $(LIBS) $(LINK_FLAGS)
+test_model_plugin.$(TEST_EXT): LIBS += $(LIBDL)
+test_model_plugin.$(TEST_EXT): test_model_plugin_dll.$(DLL)
 
 # tbb_misc.$(OBJ) has to be specified here (instead of harness_inject_scheduler.h) because it carries dependency on version_string.ver
 SCHEDULER_DEPENDENCIES = $(TBB_ASM.OBJ) tbb_misc.$(OBJ)
@@ -79,18 +89,20 @@ SCHEDULER_DIRECTLY_INCLUDED = test_task_leaks.$(TEST_EXT) \
 INCLUDES += $(INCLUDE_KEY).
 
 $(SCHEDULER_DIRECTLY_INCLUDED): WARNING_KEY += $(WARNING_SUPPRESS)
-
+$(SCHEDULER_DIRECTLY_INCLUDED): LIBS += $(LIBDL)
+#tbb.lib must not be linked to scheduler white box tests in order to not violate ODR 
+$(SCHEDULER_DIRECTLY_INCLUDED): LINK_TBB.LIB = 
 $(SCHEDULER_DIRECTLY_INCLUDED): %.$(TEST_EXT) : %.$(OBJ) $(SCHEDULER_DEPENDENCIES)
-	$(CPLUS) $(OUTPUT_KEY)$@ $(CPLUS_FLAGS) $^ $(LIBDL) $(LIBS) $(LINK_FLAGS)
+	$(call make-test-binary,$@,$^,)
 
 # Tests that use some features of C++11
 TEST_TBB_CPP11 = test_lambda.$(TEST_EXT) test_cache_aligned_allocator_STL.$(TEST_EXT)
 
 ifneq (0,$(cpp0x))
-$(TEST_TBB_CPP11:%.$(TEST_EXT)=%.$(OBJ)): %.$(OBJ): %.cpp
-	$(CPLUS) $(COMPILE_ONLY) $(CPLUS_FLAGS) $(CPP11_FLAGS) $(CXX_ONLY_FLAGS) $(CXX_WARN_SUPPRESS) $(INCLUDES) $<
-$(TEST_TBB_CPP11): %.$(TEST_EXT): %.$(OBJ) $(TBB.LIB) $(if $(use_proxy),$(LINK_TBB.LIB))
-	$(CPLUS) $(OUTPUT_KEY)$@ $(CPLUS_FLAGS) $(CPP11_FLAGS) $< $(LINK_TBB.LIB) $(LIBS) $(AUX_LIBS) $(LINK_FLAGS)
+# Made CPP11 tests use NOSTRICT flags because -strict-ansi combined with
+# -std=c++0x on ICC 13.0 results in a compile error when stdlib is included
+$(TEST_TBB_CPP11):  CPLUS_FLAGS += $(CPP11_FLAGS)
+$(TEST_TBB_CPP11):  CPLUS_FLAGS := $(CPLUS_FLAGS_NOSTRICT)
 endif
 
 # test_tbb_header detects "multiple definition" linker error using the test that covers the whole library
@@ -98,20 +110,25 @@ TWICE_LINKED_TESTS = test_tbb_header.$(TEST_EXT) \
                      test_concurrent_unordered.$(TEST_EXT)
 
 %_secondary.$(OBJ): CPLUS_FLAGS+=$(DEFINE_KEY)__TBB_TEST_SECONDARY=1
+#todo unify all the rules that change name of the obj file
 %_secondary.$(OBJ): %.cpp
-	$(CPLUS) $(OUTPUTOBJ_KEY)$@ $(COMPILE_ONLY) $(CPLUS_FLAGS) $(CXX_ONLY_FLAGS) $(CXX_WARN_SUPPRESS) $(INCLUDES) $< 
+	$(call make-cxx-obj,$@,$<,)
 
+# Detecting "multiple definition" linker error using the test that covers the whole library
 $(TWICE_LINKED_TESTS): %.$(TEST_EXT): %.$(OBJ) %_secondary.$(OBJ) $(TBB.LIB)
-	$(CPLUS) $(OUTPUT_KEY)$@ $(CPLUS_FLAGS) $*.$(OBJ) $*_secondary.$(OBJ) $(LINK_TBB.LIB) $(LIBS) $(AUX_LIBS) $(LINK_FLAGS)
+	$(call make-test-binary,$@, $*.$(OBJ) $*_secondary.$(OBJ),$(AUX_LIBS))	
 
 # Checks that TBB atomics work correctly in position independent code
-test_atomic_pic.$(TEST_EXT): test_atomic.cpp
-	$(CPLUS) $(OUTPUT_KEY)$@ $(CPLUS_FLAGS) $(PIC_KEY) $(CXX_ONLY_FLAGS) $(CXX_WARN_SUPPRESS) $(INCLUDES) $(DEFINE_KEY)__TBB_TEST_PIC=1 $< $(LINK_TBB.LIB) $(LIBS) $(AUX_LIBS) $(LINK_FLAGS)
+test_atomic_pic.$(OBJ): CPLUS_FLAGS+=$(PIC_KEY)
+test_atomic_pic.$(OBJ): CPLUS_FLAGS+=$(DEFINE_KEY)__TBB_TEST_PIC=1
+test_atomic_pic.$(OBJ): test_atomic.cpp
+	$(call make-cxx-obj,$@,$<,)
 
 #Test of generic gcc port and icc intrinsics port
-%_compiler_builtins.$(TEST_EXT): CPLUS_FLAGS+=$(DEFINE_KEY)__TBB_TEST_BUILTINS=1
-%_compiler_builtins.$(TEST_EXT): %.cpp
-	$(CPLUS) $(OUTPUT_KEY)$@ $(CPLUS_FLAGS) $(CXX_ONLY_FLAGS) $(CXX_WARN_SUPPRESS) $(INCLUDES) $< $(LIBS) $(AUX_LIBS) $(LINK_FLAGS)
+%_compiler_builtins.$(OBJ): CPLUS_FLAGS+=$(DEFINE_KEY)__TBB_TEST_BUILTINS=1
+%_compiler_builtins.$(OBJ): %.cpp
+	$(call make-cxx-obj,$@,$<,)
+
 
 # The main list of TBB tests
 TEST_TBB_PLAIN.EXE = test_assembly.$(TEST_EXT)   \
@@ -202,8 +219,8 @@ TEST_TBB_PLAIN.EXE = test_assembly.$(TEST_EXT)   \
 TEST_TBB_PLAIN.EXE += $(TEST_TBB_CPP11)
 
 ifdef OPENMP_FLAG
-test_openmp.$(TEST_EXT): test_openmp.cpp
-	$(CPLUS) $(OPENMP_FLAG) $(OUTPUT_KEY)$@ $(CPLUS_FLAGS) $(INCLUDES) $< $(LIBS) $(LINK_TBB.LIB) $(LINK_FLAGS)
+test_openmp.$(TEST_EXT): CPLUS_FLAGS += $(OPENMP_FLAG)
+
 test_tbb_openmp: $(TEST_PREREQUISITE) test_openmp.$(TEST_EXT)
 	$(run_cmd) ./test_openmp.$(TEST_EXT) 1:4
 else
@@ -212,8 +229,9 @@ test_tbb_openmp:
 endif
 
 ifdef CILK_AVAILABLE
-test_cilk_dynamic_load.$(TEST_EXT): test_cilk_dynamic_load.$(OBJ) test_cilk_dynamic_load_dll.$(DLL)
-	$(CPLUS) $(OUTPUT_KEY)$@ $(CPLUS_FLAGS) $< $(LINK_TBB.LIB) $(LIBDL) $(LIBS) $(LINK_FLAGS)
+test_cilk_dynamic_load.$(TEST_EXT): LIBS += $(LIBDL)
+test_cilk_dynamic_load.$(TEST_EXT): test_cilk_dynamic_load_dll.$(DLL)
+
 # Workaround on cilkrts linkage known issue (see Intel(R) C++ Composer XE 2011 Release Notes)
 # The issue reveals itself if a version of binutils is prior to 2.17
 ifeq (linux_icc,$(tbb_os)_$(compiler))
@@ -224,7 +242,7 @@ test_tbb_cilk: test_cilk_interop.$(TEST_EXT) test_cilk_dynamic_load.$(TEST_EXT)
 	$(run_cmd) ./test_cilk_dynamic_load.$(TEST_EXT) $(args)
 else
 test_tbb_cilk:
-	@echo "Intel(R) Cilk Plus is not available"
+	@echo "Intel(R) Cilk(TM) Plus is not available"
 endif
 
 $(TEST_TBB_PLAIN.EXE): WARNING_KEY += $(TEST_WARNING_KEY)
@@ -293,7 +311,7 @@ test_tbb_plain: $(TEST_PREREQUISITE) $(SCHEDULER_DIRECTLY_INCLUDED) $(TEST_TBB_P
 	$(run_cmd) ./test_semaphore.$(TEST_EXT) $(args) 1:4
 	$(run_cmd) ./test_reader_writer_lock.$(TEST_EXT) $(args) 1:4
 	$(run_cmd) ./test_tbb_condition_variable.$(TEST_EXT) $(args) 1:4
-	$(run_cmd) ./test_tbb_fork.$(TEST_EXT)
+	$(run_cmd) ./test_tbb_fork.$(TEST_EXT) $(args)
 	$(run_cmd) ./test_intrusive_list.$(TEST_EXT) $(args)
 	$(run_cmd) ./test_concurrent_priority_queue.$(TEST_EXT) $(args) 1:4
 	$(run_cmd) ./test_task_priority.$(TEST_EXT) $(args)
@@ -334,11 +352,11 @@ TEST_TBB_DEPRECATED.OBJ = test_concurrent_queue_deprecated.$(OBJ) \
 
 
 # For deprecated files, we don't mind warnings etc., thus compilation rules are most relaxed
-$(TEST_TBB_OLD.OBJ): %.$(OBJ): %.cpp
-	$(CPLUS) $(COMPILE_ONLY) $(CPLUS_FLAGS_DEPRECATED) $(CXX_ONLY_FLAGS) $(INCLUDES) $<
+$(TEST_TBB_OLD.OBJ): CPLUS_FLAGS := $(CPLUS_FLAGS_DEPRECATED)
 
+%_deprecated.$(OBJ): CPLUS_FLAGS := $(CPLUS_FLAGS_DEPRECATED)
 %_deprecated.$(OBJ): %.cpp
-	$(CPLUS) $(COMPILE_ONLY) $(OUTPUTOBJ_KEY)$@ $(CPLUS_FLAGS_DEPRECATED) $(CXX_ONLY_FLAGS) $(INCLUDES) $<
+	$(call make-cxx-obj,$@, $<,)
 .PRECIOUS: %_deprecated.$(OBJ)
 
 TEST_TBB_OLD.EXE = $(subst .$(OBJ),.$(TEST_EXT),$(TEST_TBB_OLD.OBJ) $(TEST_TBB_DEPRECATED.OBJ))
diff --git a/build/android.gcc.inc b/build/android.gcc.inc
index 682ae2e..dc46a29 100644
--- a/build/android.gcc.inc
+++ b/build/android.gcc.inc
@@ -49,7 +49,7 @@ CONLY = $(tbb_tool_prefix)gcc
 # -soname is necessary for proper linkage to TBB prebuilt libraries when building application with Android SDK
 LIB_LINK_FLAGS = $(DYLIB_KEY) -Wl,-soname=$(BUILDING_LIBRARY)
 
-LINK_FLAGS = -Wl,-rpath-link=.
+LINK_FLAGS = -Wl,-rpath-link=. -rdynamic
 C_FLAGS = $(CPLUS_FLAGS)
 
 # gcc 4.4 and higher support C++11
diff --git a/build/common_rules.inc b/build/common_rules.inc
index 340c6c7..f00af86 100644
--- a/build/common_rules.inc
+++ b/build/common_rules.inc
@@ -58,9 +58,20 @@ endif
 
 CONLY ?= $(CPLUS)
 
+#$(1) - is the obj name
+#$(2) - is the input cpp files 
+#$(3) - extra flags to compile with
+define make-cxx-obj 
+	$(CPLUS) $(OUTPUTOBJ_KEY)$(strip $1) $(COMPILE_ONLY) $(CPLUS_FLAGS) $(CXX_ONLY_FLAGS) $(CXX_WARN_SUPPRESS) $(INCLUDES) $3 $2
+endef
+
 # The most generic rules
 %.$(OBJ): %.cpp
-	$(CPLUS) $(COMPILE_ONLY) $(CPLUS_FLAGS) $(CXX_ONLY_FLAGS) $(CXX_WARN_SUPPRESS) $(INCLUDES) $<
+	$(call make-cxx-obj,$@, $<, )
+
+# Rules for generating a test DLL
+%_dll.$(OBJ): %.cpp
+	$(CPLUS) $(COMPILE_ONLY) $(OUTPUTOBJ_KEY)$@ $(CPLUS_FLAGS) $(PIC_KEY) $(DEFINE_KEY)_USRDLL $(INCLUDES) $<
 
 %.$(OBJ): %.c
 	$(CONLY) $(COMPILE_ONLY) $(C_FLAGS) $(INCLUDES) $<
@@ -89,7 +100,7 @@ CONLY ?= $(CPLUS)
 
 ifeq (1,$(TBB_NOSTRICT))
 # GNU 3.2.3 headers have a ISO syntax that is rejected by Intel compiler in -strict-ansi mode.
-# The Mac uses gcc, so the list is empty for that platform.
+# The OS X* uses gcc, so the list is empty for that platform.
 # The files below need the -strict-ansi flag downgraded to -ansi to compile
 
 $(KNOWN_NOSTRICT): %.$(OBJ): %.cpp
@@ -123,7 +134,3 @@ version_string.ver:
 	$(MAKE_VERSIONS)
 endif
 
-
-# Rules for generating a test DLL
-%_dll.$(OBJ): %.cpp
-	$(CPLUS) $(COMPILE_ONLY) $(OUTPUTOBJ_KEY)$@ $(CPLUS_FLAGS) $(PIC_KEY) $(DEFINE_KEY)_USRDLL $(INCLUDES) $<
diff --git a/build/generate_tbbvars.sh b/build/generate_tbbvars.sh
index 227619f..3a83feb 100644
--- a/build/generate_tbbvars.sh
+++ b/build/generate_tbbvars.sh
@@ -31,7 +31,6 @@ bin_dir="$PWD"  #
 cd "$tbb_root"  # keep this comments here
 tbb_root="$PWD" # to make it unsensible
 cd "$bin_dir"   # to EOL encoding
-[ "`uname`" = "Darwin" ] && dll_path="DYLD_LIBRARY_PATH" || dll_path="LD_LIBRARY_PATH" #
 [ -f ./tbbvars.sh ] || cat >./tbbvars.sh <<EOF
 #!/bin/bash
 export TBBROOT="${tbb_root}" #
@@ -41,15 +40,15 @@ if [ -z "\$CPATH" ]; then #
 else #
     export CPATH="\${TBBROOT}/include:\$CPATH" #
 fi #
-if [ -z "\$LIBRARY_PATH" ]; then #
-    export LIBRARY_PATH="\${tbb_bin}" #
+if [ -z "\$${2}LIBRARY_PATH" ]; then #
+    export ${2}LIBRARY_PATH="\${tbb_bin}" #
 else #
-    export LIBRARY_PATH="\${tbb_bin}:\$LIBRARY_PATH" #
+    export ${2}LIBRARY_PATH="\${tbb_bin}:\$${2}LIBRARY_PATH" #
 fi #
-if [ -z "\$${dll_path}" ]; then #
-    export ${dll_path}="\${tbb_bin}" #
+if [ -z "\$${1}LD_LIBRARY_PATH" ]; then #
+    export ${1}LD_LIBRARY_PATH="\${tbb_bin}" #
 else #
-    export ${dll_path}="\${tbb_bin}:\$${dll_path}" #
+    export ${1}LD_LIBRARY_PATH="\${tbb_bin}:\$${1}LD_LIBRARY_PATH" #
 fi #
 ${TBB_CUSTOM_VARS_SH} #
 EOF
@@ -62,15 +61,15 @@ if (! \$?CPATH) then #
 else #
     setenv CPATH "\${TBBROOT}/include:\$CPATH" #
 endif #
-if (! \$?LIBRARY_PATH) then #
-    setenv LIBRARY_PATH "\${tbb_bin}" #
+if (! \$?${2}LIBRARY_PATH) then #
+    setenv ${2}LIBRARY_PATH "\${tbb_bin}" #
 else #
-    setenv LIBRARY_PATH "\${tbb_bin}:\$LIBRARY_PATH" #
+    setenv ${2}LIBRARY_PATH "\${tbb_bin}:\$${2}LIBRARY_PATH" #
 endif #
-if (! \$?${dll_path}) then #
-    setenv ${dll_path} "\${tbb_bin}" #
+if (! \$?${1}LD_LIBRARY_PATH) then #
+    setenv ${1}LD_LIBRARY_PATH "\${tbb_bin}" #
 else #
-    setenv ${dll_path} "\${tbb_bin}:\$${dll_path}" #
+    setenv ${1}LD_LIBRARY_PATH "\${tbb_bin}:\$${1}LD_LIBRARY_PATH" #
 endif #
 ${TBB_CUSTOM_VARS_CSH} #
 EOF
diff --git a/build/linux.gcc.inc b/build/linux.gcc.inc
index 2e4a038..ddecbb5 100644
--- a/build/linux.gcc.inc
+++ b/build/linux.gcc.inc
@@ -46,7 +46,7 @@ CPLUS = g++
 CONLY = gcc
 LIB_LINK_FLAGS = $(DYLIB_KEY) -Wl,-soname=$(BUILDING_LIBRARY)
 LIBS += -lpthread -lrt
-LINK_FLAGS = -Wl,-rpath-link=.
+LINK_FLAGS = -Wl,-rpath-link=. -rdynamic
 C_FLAGS = $(CPLUS_FLAGS)
 # gcc 4.4 and higher support -std=c++0x
 ifneq (,$(shell gcc -dumpversion | egrep  "^(4\.[4-9]|[5-9])"))
@@ -78,7 +78,7 @@ TBB_ASM.OBJ=
 MALLOC_ASM.OBJ=
 
 ifeq (ia64,$(arch))
-# Position-independent code (PIC) is a must on IA-64, even for regular (not shared) executables
+# Position-independent code (PIC) is a must on IA-64 architecture, even for regular (not shared) executables
     CPLUS_FLAGS += $(PIC_KEY)
 endif
 
diff --git a/build/linux.icc.inc b/build/linux.icc.inc
index d3452eb..224e3e4 100644
--- a/build/linux.icc.inc
+++ b/build/linux.icc.inc
@@ -53,13 +53,14 @@ endif
 OPENMP_FLAG = -openmp
 LIB_LINK_FLAGS = -shared -i-static -Wl,-soname=$(BUILDING_LIBRARY)
 LIBS += -lpthread -lrt
+LINK_FLAGS = -rdynamic
 C_FLAGS = $(CPLUS_FLAGS)
 # ICC 11.0 and higher support -std=c++0x
 ifneq (,$(shell icc -dumpversion | egrep  "^1[1-9]\."))
     CPP11_FLAGS = -std=c++0x -D_TBB_CPP0X
 endif
 
-# ICC 12.0 and higher provide Intel(R) Cilk Plus
+# ICC 12.0 and higher provide Intel(R) Cilk(TM) Plus
 ifneq (,$(shell icc -dumpversion | egrep  "^1[2-9]\."))
     CILK_AVAILABLE = yes
 endif
@@ -74,7 +75,7 @@ endif
 
 ifeq (ia64,$(arch))
     ITT_NOTIFY =
-# Position-independent code (PIC) is a must on IA-64, even for regular (not shared) executables
+# Position-independent code (PIC) is a must on IA-64 architecture, even for regular (not shared) executables
 # strict-ansi does not work with <signal.h> on RHEL 4 AS
     CPLUS_FLAGS += $(PIC_KEY) $(if $(findstring cc3.,$(runtime)),-ansi,-strict-ansi)
 else
diff --git a/build/macos.icc.inc b/build/macos.icc.inc
index da8baea..8ce49df 100644
--- a/build/macos.icc.inc
+++ b/build/macos.icc.inc
@@ -49,7 +49,7 @@ ifneq (,$(shell icc -dumpversion | egrep  "^1[1-9]\."))
     CPP11_FLAGS = -std=c++0x -D_TBB_CPP0X
 endif
 
-# ICC 12.0 and higher provide Intel(R) Cilk Plus
+# ICC 12.0 and higher provide Intel(R) Cilk(TM) Plus
 ifneq (,$(shell icc -dumpversion | egrep  "^1[2-9]\."))
     CILK_AVAILABLE = yes
 endif
diff --git a/build/macos.inc b/build/macos.inc
index ef9248b..3a772e5 100644
--- a/build/macos.inc
+++ b/build/macos.inc
@@ -85,7 +85,7 @@ MD?=mkdir -p
 NUL= /dev/null
 SLASH=/
 MAKE_VERSIONS=sh $(tbb_root)/build/version_info_macos.sh $(CPLUS) $(CPLUS_FLAGS) $(INCLUDES) >version_string.ver
-MAKE_TBBVARS=sh $(tbb_root)/build/generate_tbbvars.sh
+MAKE_TBBVARS=sh $(tbb_root)/build/generate_tbbvars.sh DY
 
 ifdef DYLD_LIBRARY_PATH
         export DYLD_LIBRARY_PATH := .:$(DYLD_LIBRARY_PATH)
diff --git a/build/mic.icc.inc b/build/mic.icc.inc
index ccae7b6..e63c833 100644
--- a/build/mic.icc.inc
+++ b/build/mic.icc.inc
@@ -70,7 +70,7 @@ endif
 CPLUS_FLAGS += -DHARNESS_INCOMPLETE_SOURCES=1 -D__TBB_MIC_NATIVE -DTBB_USE_EXCEPTIONS=0 -opt-streaming-stores never
 CPLUS += -mmic
 CONLY += -mmic
-LINK_FLAGS = -Wl,-rpath-link=.
+LINK_FLAGS = -Wl,-rpath-link=. -rdynamic
 # Tell the icc to not link against libcilk*. Otherwise icc tries to link and emits a warning message.
 LIB_LINK_LIBS += -no-intel-extensions
 # Do not depend on libirc etc dynamic libs. It makes 'native' execution easier for the users.
diff --git a/build/mic.linux.inc b/build/mic.linux.inc
index 87a1b02..245c6db 100644
--- a/build/mic.linux.inc
+++ b/build/mic.linux.inc
@@ -34,7 +34,7 @@ ifneq ($(BUILDING_PHASE),1)
 endif
 
 MAKE_VERSIONS=sh $(tbb_root)/build/version_info_linux.sh $(CPLUS) $(CPLUS_FLAGS) $(INCLUDES) >version_string.ver
-MAKE_TBBVARS=sh $(tbb_root)/build/generate_tbbvars.sh
+MAKE_TBBVARS=sh $(tbb_root)/build/generate_tbbvars.sh MIC_ MIC_
 def_prefix=lin64
 
 TEST_LAUNCHER=
diff --git a/build/windows.icl.inc b/build/windows.icl.inc
index ccdb7d1..ae12dc7 100644
--- a/build/windows.icl.inc
+++ b/build/windows.icl.inc
@@ -55,7 +55,7 @@ ifeq (ok,$(call detect_js,/minversion icl 11))
     CPP11_FLAGS = /Qstd=c++0x /D_TBB_CPP0X
 endif
 
-# ICC 12.0 and higher provide Intel(R) Cilk Plus
+# ICC 12.0 and higher provide Intel(R) Cilk(TM) Plus
 ifeq (ok,$(call detect_js,/minversion icl 12))
     CILK_AVAILABLE = yes
 endif
diff --git a/build/windows.inc b/build/windows.inc
index c06f128..013f4f5 100644
--- a/build/windows.inc
+++ b/build/windows.inc
@@ -68,9 +68,10 @@ ASMEXT = asm
 
 def_prefix = $(if $(findstring intel64,$(arch)),win64,win32)
 
-# Target Windows version. Do not increase beyond 0x0501 without prior discussion!
+# Target Windows version. Do not increase beyond 0x0502 without prior discussion!
 # Used as the value for macro definition opiton in windows.cl.inc etc.
-_WIN32_WINNT=0x0501
+# For tests, we need at least Windows XP SP2 for sake of enabling stack backtraces.
+_WIN32_WINNT=0x0502
 
 TBB.LST = $(tbb_root)/src/tbb/$(def_prefix)-tbb-export.lst
 TBB.DEF = $(TBB.LST:.lst=.def)
diff --git a/doc/html/a00031.html b/doc/html/a00031.html
index 4a18eaf..5a817a5 100644
--- a/doc/html/a00031.html
+++ b/doc/html/a00031.html
@@ -22,10 +22,13 @@
   </ul></div>
 <h1>tbb::interface5::concurrent_priority_queue< T, Compare, A > Member List</h1>This is the complete list of members for <a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a>, including all inherited members.<p><table>
   <tr class="memlist"><td><a class="el" href="a00285.html#1712cb3a46bc1821fccc5e2cd83d5cd7">allocator_type</a> typedef</td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td></td></tr>
+  <tr class="memlist"><td><a class="el" href="a00285.html#e89e38c09b212a3d39d6e36a5ee33fe8">assign</a>(InputIterator begin, InputIterator end)</td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td><code> [inline]</code></td></tr>
+  <tr class="memlist"><td><a class="el" href="a00285.html#38dc06a3143eefe5697b8cd157e0d00a">assign</a>(std::initializer_list< T > const &il)</td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td><code> [inline]</code></td></tr>
   <tr class="memlist"><td><a class="el" href="a00285.html#0bdcdf7cde9fd369edca845bec34ca94">clear</a>()</td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td><code> [inline]</code></td></tr>
   <tr class="memlist"><td><a class="el" href="a00285.html#eefa40599afe00ea393897d8f5662e65">concurrent_priority_queue</a>(const allocator_type &a=allocator_type())</td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td><code> [inline, explicit]</code></td></tr>
   <tr class="memlist"><td><a class="el" href="a00285.html#4555b4a55415a70024c4004b51e9f385">concurrent_priority_queue</a>(size_type init_capacity, const allocator_type &a=allocator_type())</td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td><code> [inline, explicit]</code></td></tr>
   <tr class="memlist"><td><a class="el" href="a00285.html#9147cb4207017c260a0c3929c12cd40f">concurrent_priority_queue</a>(InputIterator begin, InputIterator end, const allocator_type &a=allocator_type())</td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td><code> [inline]</code></td></tr>
+  <tr class="memlist"><td><a class="el" href="a00285.html#36fc4734032fff2e98bc9f21b73ab1be">concurrent_priority_queue</a>(std::initializer_list< T > const &init_list, const allocator_type &a=allocator_type())</td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td><code> [inline]</code></td></tr>
   <tr class="memlist"><td><a class="el" href="a00285.html#509419e320f200456d89dc54a65140b3">concurrent_priority_queue</a>(const concurrent_priority_queue &src)</td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td><code> [inline, explicit]</code></td></tr>
   <tr class="memlist"><td><a class="el" href="a00285.html#c8b20e7430c5302936030bef59a562be">concurrent_priority_queue</a>(const concurrent_priority_queue &src, const allocator_type &a)</td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td><code> [inline]</code></td></tr>
   <tr class="memlist"><td><a class="el" href="a00285.html#a4ded8601a434098605be0dcc4febc60">const_reference</a> typedef</td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td></td></tr>
@@ -33,6 +36,7 @@
   <tr class="memlist"><td><a class="el" href="a00285.html#317c508fa92df218be5d014c26c09bb7">empty</a>() const </td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td><code> [inline]</code></td></tr>
   <tr class="memlist"><td><a class="el" href="a00285.html#d545d444fb0d16148f9b61fd89f9a337">get_allocator</a>() const </td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td><code> [inline]</code></td></tr>
   <tr class="memlist"><td><a class="el" href="a00285.html#2ab7f7808891027ac0f0f5b3a4be51e9">operator=</a>(const concurrent_priority_queue &src)</td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td><code> [inline]</code></td></tr>
+  <tr class="memlist"><td><a class="el" href="a00285.html#9296c2eaeeae24cb7019659c2fdf0f62">operator=</a>(std::initializer_list< T > const &il)</td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td><code> [inline]</code></td></tr>
   <tr class="memlist"><td><a class="el" href="a00285.html#d905af7b8f6defff562f5ae9c3275763">push</a>(const_reference elem)</td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td><code> [inline]</code></td></tr>
   <tr class="memlist"><td><a class="el" href="a00285.html#5804b3c708ef4e50d603f918ef2b9e58">reference</a> typedef</td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td></td></tr>
   <tr class="memlist"><td><a class="el" href="a00285.html#8b2ae25c61338c6fd59e94fe09822ba5">size</a>() const </td><td><a class="el" href="a00285.html">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a></td><td><code> [inline]</code></td></tr>
diff --git a/doc/html/a00169.html b/doc/html/a00169.html
index 95f54e8..2ca47d4 100644
--- a/doc/html/a00169.html
+++ b/doc/html/a00169.html
@@ -22,6 +22,7 @@
   </ul></div>
 <h1>tbb::internal::atomic_backoff Member List</h1>This is the complete list of members for <a class="el" href="a00268.html">tbb::internal::atomic_backoff</a>, including all inherited members.<p><table>
   <tr bgcolor="#f0f0f0"><td><b>atomic_backoff</b>() (defined in <a class="el" href="a00268.html">tbb::internal::atomic_backoff</a>)</td><td><a class="el" href="a00268.html">tbb::internal::atomic_backoff</a></td><td><code> [inline]</code></td></tr>
+  <tr bgcolor="#f0f0f0"><td><b>atomic_backoff</b>(bool) (defined in <a class="el" href="a00268.html">tbb::internal::atomic_backoff</a>)</td><td><a class="el" href="a00268.html">tbb::internal::atomic_backoff</a></td><td><code> [inline]</code></td></tr>
   <tr bgcolor="#f0f0f0"><td><b>bounded_pause</b>() (defined in <a class="el" href="a00268.html">tbb::internal::atomic_backoff</a>)</td><td><a class="el" href="a00268.html">tbb::internal::atomic_backoff</a></td><td><code> [inline]</code></td></tr>
   <tr class="memlist"><td><a class="el" href="a00268.html#a174ea93e3bd3d5cce82389c2f28d037">pause</a>()</td><td><a class="el" href="a00268.html">tbb::internal::atomic_backoff</a></td><td><code> [inline]</code></td></tr>
   <tr bgcolor="#f0f0f0"><td><b>reset</b>() (defined in <a class="el" href="a00268.html">tbb::internal::atomic_backoff</a>)</td><td><a class="el" href="a00268.html">tbb::internal::atomic_backoff</a></td><td><code> [inline]</code></td></tr>
diff --git a/doc/html/a00268.html b/doc/html/a00268.html
index ee69194..e626caa 100644
--- a/doc/html/a00268.html
+++ b/doc/html/a00268.html
@@ -30,6 +30,9 @@
 <a href="a00169.html">List of all members.</a><table border="0" cellpadding="0" cellspacing="0">
 <tr><td></td></tr>
 <tr><td colspan="2"><br><h2>Public Member Functions</h2></td></tr>
+<tr><td class="memItemLeft" nowrap align="right" valign="top"><a class="anchor" name="35029ddea6eb855b756292d1089054bb"></a><!-- doxytag: member="tbb::internal::atomic_backoff::atomic_backoff" ref="35029ddea6eb855b756292d1089054bb" args="(bool)" -->
+ </td><td class="memItemRight" valign="bottom"><b>atomic_backoff</b> (bool)</td></tr>
+
 <tr><td class="memItemLeft" nowrap align="right" valign="top"><a class="anchor" name="a174ea93e3bd3d5cce82389c2f28d037"></a><!-- doxytag: member="tbb::internal::atomic_backoff::pause" ref="a174ea93e3bd3d5cce82389c2f28d037" args="()" -->
 void </td><td class="memItemRight" valign="bottom"><a class="el" href="a00268.html#a174ea93e3bd3d5cce82389c2f28d037">pause</a> ()</td></tr>
 
diff --git a/doc/html/a00285.html b/doc/html/a00285.html
index 21fcc55..7e89a2c 100644
--- a/doc/html/a00285.html
+++ b/doc/html/a00285.html
@@ -68,6 +68,10 @@ template<typename InputIterator> </td></tr>
 <tr><td class="memTemplItemLeft" nowrap align="right" valign="top"> </td><td class="memTemplItemRight" valign="bottom"><a class="el" href="a00285.html#9147cb4207017c260a0c3929c12cd40f">concurrent_priority_queue</a> (InputIterator begin, InputIterator end, const <a class="el" href="a00285.html#1712cb3a46bc1821fccc5e2cd83d5cd7">allocator_type</a> &a=<a class="el" href="a00285.html#1712cb3a46bc1821fccc5e2cd83d5cd7">allocator_type</a>())</td></tr>
 
 <tr><td class="mdescLeft"> </td><td class="mdescRight">[begin,end) constructor <br></td></tr>
+<tr><td class="memItemLeft" nowrap align="right" valign="top"><a class="anchor" name="36fc4734032fff2e98bc9f21b73ab1be"></a><!-- doxytag: member="tbb::interface5::concurrent_priority_queue::concurrent_priority_queue" ref="36fc4734032fff2e98bc9f21b73ab1be" args="(std::initializer_list< T > const &init_list, const allocator_type &a=allocator_type())" -->
+ </td><td class="memItemRight" valign="bottom"><a class="el" href="a00285.html#36fc4734032fff2e98bc9f21b73ab1be">concurrent_priority_queue</a> (std::initializer_list< T > const &init_list, const <a class="el" href="a00285.html#1712cb3a46bc1821fccc5e2cd83d5cd7">allocator_type</a> &a=<a class="el" href="a00285.html#1712cb3a46bc1821fccc5e2cd83d5cd7">allocator_type</a>())</td></tr>
+
+<tr><td class="mdescLeft"> </td><td class="mdescRight">Constructor from std::initializer_list. <br></td></tr>
 <tr><td class="memItemLeft" nowrap align="right" valign="top"> </td><td class="memItemRight" valign="bottom"><a class="el" href="a00285.html#509419e320f200456d89dc54a65140b3">concurrent_priority_queue</a> (const <a class="el" href="a00285.html">concurrent_priority_queue</a> &src)</td></tr>
 
 <tr><td class="mdescLeft"> </td><td class="mdescRight">Copy constructor.  <a href="#509419e320f200456d89dc54a65140b3"></a><br></td></tr>
@@ -77,6 +81,19 @@ template<typename InputIterator> </td></tr>
 <tr><td class="memItemLeft" nowrap align="right" valign="top"><a class="el" href="a00285.html">concurrent_priority_queue</a> & </td><td class="memItemRight" valign="bottom"><a class="el" href="a00285.html#2ab7f7808891027ac0f0f5b3a4be51e9">operator=</a> (const <a class="el" href="a00285.html">concurrent_priority_queue</a> &src)</td></tr>
 
 <tr><td class="mdescLeft"> </td><td class="mdescRight">Assignment operator.  <a href="#2ab7f7808891027ac0f0f5b3a4be51e9"></a><br></td></tr>
+<tr><td class="memTemplParams" nowrap colspan="2"><a class="anchor" name="e89e38c09b212a3d39d6e36a5ee33fe8"></a><!-- doxytag: member="tbb::interface5::concurrent_priority_queue::assign" ref="e89e38c09b212a3d39d6e36a5ee33fe8" args="(InputIterator begin, InputIterator end)" -->
+template<typename InputIterator> </td></tr>
+<tr><td class="memTemplItemLeft" nowrap align="right" valign="top">void </td><td class="memTemplItemRight" valign="bottom"><a class="el" href="a00285.html#e89e38c09b212a3d39d6e36a5ee33fe8">assign</a> (InputIterator begin, InputIterator end)</td></tr>
+
+<tr><td class="mdescLeft"> </td><td class="mdescRight">Assign the queue from [begin,end) range, not thread-safe. <br></td></tr>
+<tr><td class="memItemLeft" nowrap align="right" valign="top"><a class="anchor" name="38dc06a3143eefe5697b8cd157e0d00a"></a><!-- doxytag: member="tbb::interface5::concurrent_priority_queue::assign" ref="38dc06a3143eefe5697b8cd157e0d00a" args="(std::initializer_list< T > const &il)" -->
+void </td><td class="memItemRight" valign="bottom"><a class="el" href="a00285.html#38dc06a3143eefe5697b8cd157e0d00a">assign</a> (std::initializer_list< T > const &il)</td></tr>
+
+<tr><td class="mdescLeft"> </td><td class="mdescRight">Assign the queue from std::initializer_list, not thread-safe. <br></td></tr>
+<tr><td class="memItemLeft" nowrap align="right" valign="top"><a class="anchor" name="9296c2eaeeae24cb7019659c2fdf0f62"></a><!-- doxytag: member="tbb::interface5::concurrent_priority_queue::operator=" ref="9296c2eaeeae24cb7019659c2fdf0f62" args="(std::initializer_list< T > const &il)" -->
+<a class="el" href="a00285.html">concurrent_priority_queue</a> & </td><td class="memItemRight" valign="bottom"><a class="el" href="a00285.html#9296c2eaeeae24cb7019659c2fdf0f62">operator=</a> (std::initializer_list< T > const &il)</td></tr>
+
+<tr><td class="mdescLeft"> </td><td class="mdescRight">Assign from std::initializer_list, not thread-safe. <br></td></tr>
 <tr><td class="memItemLeft" nowrap align="right" valign="top">bool </td><td class="memItemRight" valign="bottom"><a class="el" href="a00285.html#317c508fa92df218be5d014c26c09bb7">empty</a> () const </td></tr>
 
 <tr><td class="mdescLeft"> </td><td class="mdescRight">Returns true if empty, false otherwise.  <a href="#317c508fa92df218be5d014c26c09bb7"></a><br></td></tr>
diff --git a/doc/html/a00428.html b/doc/html/a00428.html
index 745d5ea..d72684f 100644
--- a/doc/html/a00428.html
+++ b/doc/html/a00428.html
@@ -606,7 +606,7 @@ __TBB_full_memory_fence() Must prevent all memory operations from being reordere
 __TBB_machine_cmpswp4( volatile void *ptr, int32_t value, int32_t comparand ) Must be provided if __TBB_USE_FENCED_ATOMICS is not set.<p>
 __TBB_machine_cmpswp8( volatile void *ptr, int32_t value, int64_t comparand ) Must be provided for 64-bit architectures if __TBB_USE_FENCED_ATOMICS is not set, and for 32-bit architectures if __TBB_64BIT_ATOMICS is set<p>
 __TBB_machine_<op><S><fence>(...), where <op> = {cmpswp, fetchadd, fetchstore} <S> = {1, 2, 4, 8} <fence> = {full_fence, acquire, release, relaxed} Must be provided if __TBB_USE_FENCED_ATOMICS is set.<p>
-__TBB_control_consistency_helper() Bridges the memory-semantics gap between architectures providing only implicit C++0x "consume" semantics (like Power Architecture) and those also implicitly obeying control dependencies (like IA-64). It must be used only in conditional code where the condition is itself data-dependent, and will then make subsequent code behave as if the original data dependency were acquired. It needs only a compiler fence where implied by the architecture either specif [...]
+__TBB_control_consistency_helper() Bridges the memory-semantics gap between architectures providing only implicit C++0x "consume" semantics (like Power Architecture) and those also implicitly obeying control dependencies (like IA-64 architecture). It must be used only in conditional code where the condition is itself data-dependent, and will then make subsequent code behave as if the original data dependency were acquired. It needs only a compiler fence where implied by the architecture  [...]
 __TBB_acquire_consistency_helper(), __TBB_release_consistency_helper() Must be provided if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE is set. Enforce acquire and release semantics in generic implementations of fenced store and load operations. Depending on the particular architecture/compiler combination they may be a hardware fence, a compiler fence, both or nothing. 
 <p>
 <hr><h2>Enumeration Type Documentation</h2>
diff --git a/doc/html/functions_0x61.html b/doc/html/functions_0x61.html
index b82e961..69e14c5 100644
--- a/doc/html/functions_0x61.html
+++ b/doc/html/functions_0x61.html
@@ -76,7 +76,7 @@ Here is a list of all documented class members with links to the class documenta
 : <a class="el" href="a00349.html#8ccc518caf31075a3e073996d2d240a4">tbb::task</a><li>allocated
 : <a class="el" href="a00349.html#4a3c415562d17905390ea5b49d12293ebe94d3348dd038e41107819f00c1884c">tbb::task</a><li>allocator_type
 : <a class="el" href="a00293.html#3c03eb40955b933b01987222722ac4bd">tbb::interface6::enumerable_thread_specific< T, Allocator, ETS_key_type ></a>, <a class="el" href="a00280.html#2e2726fccf6d975dc1071608cc0bbf90">tbb::concurrent_bounded_queue< T, A ></a>, <a class="el" href="a00286.html#5a3956341728eaa558d8827063718cac">tbb::strict_ppl::concurrent_queue< T, A ></a>, <a class="el" href="a00285.html#1712cb3a46bc1821fccc5e2cd83d5cd7">tbb::interface5::concurrent_priority_qu [...]
-: <a class="el" href="a00288.html#c04d64fe86696a084afa117d34384b5f">tbb::concurrent_vector< T, A ></a><li>at()
+: <a class="el" href="a00288.html#c04d64fe86696a084afa117d34384b5f">tbb::concurrent_vector< T, A ></a>, <a class="el" href="a00285.html#38dc06a3143eefe5697b8cd157e0d00a">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a><li>at()
 : <a class="el" href="a00288.html#23e14a38af748edff96a7adc3a0f1c58">tbb::concurrent_vector< T, A ></a><li>automatic
 : <a class="el" href="a00355.html#8f5988e2b0fbb2d533fcbb7f2583743f">tbb::task_scheduler_init</a>, <a class="el" href="a00350.html#fa26370c094032900c1ed69d8e92f4e8">tbb::interface6::task_arena</a></ul>
 <hr>
diff --git a/doc/html/functions_0x6f.html b/doc/html/functions_0x6f.html
index acd86e7..35d5500 100644
--- a/doc/html/functions_0x6f.html
+++ b/doc/html/functions_0x6f.html
@@ -72,7 +72,7 @@ Here is a list of all documented class members with links to the class documenta
 : <a class="el" href="a00362.html#09dde78a4100800c11bb883d6204b586">tbb::tick_count</a>, <a class="el" href="a00363.html#fa509691e1d689830931e36edd274f76">tbb::tick_count::interval_t</a><li>operator-=()
 : <a class="el" href="a00363.html#35ff7eaf7c2031b4a991402ac9ecb940">tbb::tick_count::interval_t</a><li>operator->()
 : <a class="el" href="a00282.html#a807920cdffe3ec5c5e282b4d1ff92a2">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator >::accessor</a>, <a class="el" href="a00284.html#3d03a48ecb8cd9549bd8be64b09c9b0d">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator >::const_accessor</a><li>operator=()
-: <a class="el" href="a00288.html#85cc876b1dec457b831b4745be274be1">tbb::concurrent_vector< T, A ></a>, <a class="el" href="a00285.html#2ab7f7808891027ac0f0f5b3a4be51e9">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a>, <a class="el" href="a00281.html#088d1aaccc816884a49e38f7065622c8">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>operator[]()
+: <a class="el" href="a00288.html#85cc876b1dec457b831b4745be274be1">tbb::concurrent_vector< T, A ></a>, <a class="el" href="a00285.html#9296c2eaeeae24cb7019659c2fdf0f62">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a>, <a class="el" href="a00281.html#088d1aaccc816884a49e38f7065622c8">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>operator[]()
 : <a class="el" href="a00288.html#c6fade5c732cc95274d1d8277ea619d1">tbb::concurrent_vector< T, A ></a><li>output_type
 : <a class="el" href="a00304.html#6e67fc480147c0b88a483b85db6457b0">tbb::flow::interface6::limiter_node< T ></a>, <a class="el" href="a00324.html#2cb099b590246b6bc93cc15e78c6ee5c">tbb::flow::interface6::priority_queue_node< T, Compare, A ></a>, <a class="el" href="a00340.html#ca026eaef70e35791c407323199031a7">tbb::flow::interface6::sequencer_node< T, A ></a>, <a class="el" href="a00325.html#25b5a53ab1f9a342644fa3759bc0b1ad">tbb::flow::interface6::queue_node< T, A &gt [...]
 <hr>
diff --git a/doc/html/functions_func.html b/doc/html/functions_func.html
index 5e2f0d5..728ccd0 100644
--- a/doc/html/functions_func.html
+++ b/doc/html/functions_func.html
@@ -71,7 +71,7 @@
 : <a class="el" href="a00349.html#1ff794f7053cd9148d5f280fbf07377f">tbb::task</a><li>allocate_continuation()
 : <a class="el" href="a00349.html#1434c79a5138993269d034008bff7329">tbb::task</a><li>allocate_root()
 : <a class="el" href="a00349.html#8ccc518caf31075a3e073996d2d240a4">tbb::task</a><li>assign()
-: <a class="el" href="a00288.html#c04d64fe86696a084afa117d34384b5f">tbb::concurrent_vector< T, A ></a><li>at()
+: <a class="el" href="a00288.html#c04d64fe86696a084afa117d34384b5f">tbb::concurrent_vector< T, A ></a>, <a class="el" href="a00285.html#38dc06a3143eefe5697b8cd157e0d00a">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a><li>at()
 : <a class="el" href="a00288.html#23e14a38af748edff96a7adc3a0f1c58">tbb::concurrent_vector< T, A ></a></ul>
 <hr>
 <p></p>
diff --git a/doc/html/functions_func_0x6f.html b/doc/html/functions_func_0x6f.html
index ccad019..e77b355 100644
--- a/doc/html/functions_func_0x6f.html
+++ b/doc/html/functions_func_0x6f.html
@@ -68,7 +68,7 @@
 : <a class="el" href="a00363.html#cd9814947902e26463a69a111530f81b">tbb::tick_count::interval_t</a><li>operator-=()
 : <a class="el" href="a00363.html#35ff7eaf7c2031b4a991402ac9ecb940">tbb::tick_count::interval_t</a><li>operator->()
 : <a class="el" href="a00282.html#a807920cdffe3ec5c5e282b4d1ff92a2">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator >::accessor</a>, <a class="el" href="a00284.html#3d03a48ecb8cd9549bd8be64b09c9b0d">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator >::const_accessor</a><li>operator=()
-: <a class="el" href="a00288.html#85cc876b1dec457b831b4745be274be1">tbb::concurrent_vector< T, A ></a>, <a class="el" href="a00285.html#2ab7f7808891027ac0f0f5b3a4be51e9">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a>, <a class="el" href="a00281.html#088d1aaccc816884a49e38f7065622c8">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>operator[]()
+: <a class="el" href="a00288.html#85cc876b1dec457b831b4745be274be1">tbb::concurrent_vector< T, A ></a>, <a class="el" href="a00285.html#9296c2eaeeae24cb7019659c2fdf0f62">tbb::interface5::concurrent_priority_queue< T, Compare, A ></a>, <a class="el" href="a00281.html#088d1aaccc816884a49e38f7065622c8">tbb::interface5::concurrent_hash_map< Key, T, HashCompare, Allocator ></a><li>operator[]()
 : <a class="el" href="a00288.html#c6fade5c732cc95274d1d8277ea619d1">tbb::concurrent_vector< T, A ></a></ul>
 <hr>
 <p></p>
diff --git a/examples/GettingStarted/sub_string_finder/index.html b/examples/GettingStarted/sub_string_finder/index.html
index 1015526..0877dcd 100644
--- a/examples/GettingStarted/sub_string_finder/index.html
+++ b/examples/GettingStarted/sub_string_finder/index.html
@@ -33,7 +33,7 @@ demonstrates offload programming for Intel&reg Many Integrated Core (Intel&reg M
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2005 workspace for building and running the example (Windows* systems only).
 <DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X systems only).
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X* systems only).
 </DL>
 
 <H2>To Build</H2>
diff --git a/examples/common/copy_libraries.bat b/examples/common/copy_libraries.bat
index 561b966..8e924d5 100644
--- a/examples/common/copy_libraries.bat
+++ b/examples/common/copy_libraries.bat
@@ -28,9 +28,9 @@ REM the GNU General Public License.
 REM
 
 :: Getting parameters
-if ("%1") == ("") goto error
-if ("%2") == ("") goto error
-if ("%3") == ("") goto error
+if ("%1") == ("") goto error0
+if ("%2") == ("") goto error0
+if ("%3") == ("") goto error0
 set arch=%1
 if ("%2") == ("debug") set postfix=_debug
 set output_dir=%3
@@ -41,18 +41,19 @@ if ("%4") NEQ ("") set TBBROOT=%4
 if ("%TBBROOT%") == ("") set TBBROOT=%~d0%~p0..\..\
 
 :: Getting vs folders in case vc_mt binaries are not provided
+:: ordered from oldest to newest, so we end with newest available version
 if ("%VS90COMNTOOLS%")  NEQ ("") set vc_dir=vc9
 if ("%VS100COMNTOOLS%") NEQ ("") set vc_dir=vc10
 if ("%VS110COMNTOOLS%") NEQ ("") set vc_dir=vc11
 
 :: Are we standalone/oss or inside compiler?
-if exist "%TBBROOT%\bin\%arch%\vc9\tbb%postfix%.dll" set interim_path=bin\%arch%
-if exist "%TBBROOT%\..\redist\%arch%\tbb\vc9\tbb%postfix%.dll" set interim_path=..\redist\%arch%\tbb
-if ("%interim_path%") == ("") goto error
+if exist "%TBBROOT%\bin\%arch%\%vc_dir%\tbb%postfix%.dll" set interim_path=bin\%arch%
+if exist "%TBBROOT%\..\redist\%arch%\tbb\%vc_dir%\tbb%postfix%.dll" set interim_path=..\redist\%arch%\tbb
+if ("%interim_path%") == ("") goto error1
 
 :: Do we provide vc_mt binaries?
 if exist "%TBBROOT%\%interim_path%\vc_mt\tbb%postfix%.dll" set vc_dir=vc_mt
-if ("%vc_dir%") == ("") goto error
+if ("%vc_dir%") == ("") goto error2
 
 :: We know everything we wanted and there are no errors
 :: Copying binaries
@@ -65,8 +66,14 @@ if exist "%TBBROOT%\%interim_path%\%vc_dir%\tbb_preview%postfix%.dll" copy "%TBB
 if exist "%TBBROOT%\%interim_path%\%vc_dir%\tbb_preview%postfix%.pdb" copy "%TBBROOT%\%interim_path%\%vc_dir%\tbb_preview%postfix%.pdb" "%output_dir%"
 
 goto end
-:error
-echo Error occurred in libraries copying during post-build step.
+:error0
+echo number of parameters not correct
+exit /B 1
+:error1
+echo Could not determine path to TBB libraries
+exit /B 1
+:error2
+echo Could not determine Visual Studio version
 exit /B 1
 
 :end
diff --git a/examples/common/gui/Makefile.gmake b/examples/common/gui/Makefile.gmake
index e8c1e2f..3b61c98 100644
--- a/examples/common/gui/Makefile.gmake
+++ b/examples/common/gui/Makefile.gmake
@@ -70,13 +70,13 @@ MACUIOBJS = OpenGLView.o main.o tbbAppDelegate.o
 APPRES = $(NAME)$(SUFFIX).app/Contents/Resources
 EXE = $(NAME)$(SUFFIX).app/Contents/MacOS/$(NAME)$(SUFFIX)
 
-else # ! Mac
+else # ! OS X*
 UI = con
 EXE = $(NAME)$(SUFFIX)
 ifeq (file,$(origin UI))
 $(warning Note: no graphics output capability detected, building for console output.)
 endif
 
-endif # Mac
+endif # OS X*
 endif # X
 endif # Windows vs. other
diff --git a/examples/concurrent_hash_map/count_strings/index.html b/examples/concurrent_hash_map/count_strings/index.html
index fa97e58..478f03d 100644
--- a/examples/concurrent_hash_map/count_strings/index.html
+++ b/examples/concurrent_hash_map/count_strings/index.html
@@ -18,7 +18,7 @@ The example counts the number of unique words in a text.
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2005 workspace for building and running the example (Windows* systems only). 
 <DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X systems only). 
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X* systems only). 
 </DL>
 
 <H2>To Build</H2>
diff --git a/examples/concurrent_priority_queue/shortpath/index.html b/examples/concurrent_priority_queue/shortpath/index.html
index 26d5a1f..9c2e023 100644
--- a/examples/concurrent_priority_queue/shortpath/index.html
+++ b/examples/concurrent_priority_queue/shortpath/index.html
@@ -42,7 +42,7 @@ etc.
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2008 workspace for building and running the example with the Intel® C++ compiler (Windows* systems only).
 <DT><A HREF="xcode">xcode</A>
-<DD>Contains Mac OS* Xcode* workspace for building and running the example (Mac OS* X systems only).
+<DD>Contains OS X* Xcode* workspace for building and running the example (OS X* systems only).
 </DL>
 
 <H2>To Build</H2>
diff --git a/examples/graph/binpack/index.html b/examples/graph/binpack/index.html
index 8e139f1..4060c13 100644
--- a/examples/graph/binpack/index.html
+++ b/examples/graph/binpack/index.html
@@ -31,7 +31,7 @@ summary of the quality of the bin-packing.
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2008 workspace for building and running the example with the Intel® C++ compiler (Windows* systems only).
 <DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X systems only).
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X* systems only).
 </DL>
 
 <H2>To Build</H2>
diff --git a/examples/graph/dining_philosophers/index.html b/examples/graph/dining_philosophers/index.html
index efa77d9..e58f0a7 100644
--- a/examples/graph/dining_philosophers/index.html
+++ b/examples/graph/dining_philosophers/index.html
@@ -22,7 +22,7 @@ to be available before eating.  Eating and thinking are implemented with sleep()
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2005 workspace for building and running the 
     example (Windows* systems only).<DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X 
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X* 
     systems only).</DL>
 
 <H2>To Build</H2>
diff --git a/examples/graph/logic_sim/index.html b/examples/graph/logic_sim/index.html
index 13411c2..29cd8ed 100644
--- a/examples/graph/logic_sim/index.html
+++ b/examples/graph/logic_sim/index.html
@@ -31,7 +31,7 @@ exemplifies the multifunction_node and the or_node CPF, among others.
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2008 workspace for building and running the example with the Intel® C++ compiler (Windows* systems only).
 <DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X systems only).
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X* systems only).
 </DL>
 
 <H2>To Build</H2>
diff --git a/examples/index.html b/examples/index.html
index ae7cc0c..b58d084 100644
--- a/examples/index.html
+++ b/examples/index.html
@@ -39,15 +39,15 @@ Build each example by using one of the following methods.  The specific directio
 method can be found below.
 <UL>
 <LI>Build by using a <A HREF=#build_1>Microsoft* Visual Studio* project (Windows* systems only)</A>.
-<LI>Build by using a <A HREF=#build_2>Xcode* IDE project (Mac OS* X systems only)</A>.
-<LI>Build by using a <A HREF=#build_3>Makefile (Windows*, Linux* or Mac OS* X systems)</A>.
+<LI>Build by using a <A HREF=#build_2>Xcode* IDE project (OS X* systems only)</A>.
+<LI>Build by using a <A HREF=#build_3>Makefile (Windows*, Linux* or OS X* systems)</A>.
 </UL>
 
 <P>
 Some of the following directions refer to a shell window; this refers
 to the command prompt environment/window normally used on your system.
 A shell might be a cmd.exe command prompt window (Windows* systems), or a
-sh, bash, csh, ksh, etc. (or compatible) shell window (Windows*, Linux* or Mac OS* X systems).
+sh, bash, csh, ksh, etc. (or compatible) shell window (Windows*, Linux* or OS X* systems).
 </P>
 
 <A name="build_1"><H4>To build by using a Microsoft* Visual Studio* project (Windows* systems):</H4></A>
@@ -72,7 +72,7 @@ Perform the following steps:
     set %TBBROOT% variable pointing to <installdir> folder.</li>
 </OL>
 
-<A name="build_2"><H4>To build by using a Xcode* IDE project (Mac OS* X systems):</H4></A>
+<A name="build_2"><H4>To build by using a Xcode* IDE project (OS X* systems):</H4></A>
 Perform the following steps:
 <OL>
 <LI>Identify the project (*.xcodeproj) file for the example you wish to build and run.
@@ -90,21 +90,21 @@ Perform the following steps:
 </LI>
 </OL>
 
-<A name="build_3"><H4>To build by using a Makefile (Windows*, Linux* or Mac OS* X systems):</H4></A>
+<A name="build_3"><H4>To build by using a Makefile (Windows*, Linux* or OS X* systems):</H4></A>
 Perform the following steps:
 <OL>
 <LI>Open a shell window.  For Windows* systems, make sure this shell window has the proper environment
     defined for use with Microsoft* Visual Studio* (2005, 2008, 2010 or 2012); such a shell can be invoked
     from the Start menu, under Visual Studio, Visual Studio Tools, Visual Studio Command Prompt.
 <LI>Set up the environment in this shell window for use with Intel TBB.
-    <BR>See below for how to set up the environment for <A href="#env_1">Windows*</A>, <A href="#env_23">Linux*</A> or <A href="#env_23">Mac OS* X</A> systems.
+    <BR>See below for how to set up the environment for <A href="#env_1">Windows*</A>, <A href="#env_23">Linux*</A> or <A href="#env_23">OS X*</A> systems.
 <LI>Unless you installed Intel TBB yourself, you may not have write permissions to the directory 
     containing the example.  In this case, make a copy of the example, and use the copy for the following steps.
 <LI>In the shell window, navigate to the directory for the example
     (or to the directory for the copy of the example if you made one in the previous step).
 <LI>Use one or more of the following commands to build and run the example.
     Here, make refers to the make command normally used on your system: this could be
-    nmake, gmake, or make on Windows* systems, or make or gmake on Linux* or Mac OS* X systems.
+    nmake, gmake, or make on Windows* systems, or make or gmake on Linux* or OS X* systems.
     <DL>
     <DT><TT>make</TT>
     <DD>Default build and run.  Equivalent to 'make release test'.
@@ -115,7 +115,7 @@ Perform the following steps:
     <DT><TT>make test</TT>
     <DD>Run an executable previously produced by one of the above commands.
     <DT><TT>make <B>[</B>(above options or targets)<B>]</B> CXX=<B>{</B>icl, icc<B>}</B></TT>
-    <DD>Build and run as above, but use Intel® compilers instead of default, native compilers (e.g., icl instead of cl.exe on Windows* systems, or icc instead of g++ on Linux* or Mac OS* X systems).
+    <DD>Build and run as above, but use Intel® compilers instead of default, native compilers (e.g., icl instead of cl.exe on Windows* systems, or icc instead of g++ on Linux* or OS X* systems).
     <DT><A NAME=build_4><TT>make <B>[</B>(above options or targets)<B>]</B> offload=mic</B></TT></A>
     <DD>Build and run the offload version of an example for Intel&reg Many Integrated Core (Intel&reg MIC) Architecture.
     <DD><I>Note: Only Intel&reg MIC Architecture with Linux* based host is currently supported.</I>
@@ -146,7 +146,7 @@ it may be set up, for a given type of shell window, by using one of the followin
 </pre>
 </DL>
 <A name="env_23">
-    <H4>To set up the environment (Linux* or Mac OS* X systems):</H4></A>
+    <H4>To set up the environment (Linux* or OS X* systems):</H4></A>
 The environment may be set up, for a given type of shell window, by using one of the following commands:
 <DL>
 <DT>For sh, bash, ksh (or compatibles):
diff --git a/examples/parallel_do/parallel_preorder/index.html b/examples/parallel_do/parallel_preorder/index.html
index b9a9e04..c27faf7 100644
--- a/examples/parallel_do/parallel_preorder/index.html
+++ b/examples/parallel_do/parallel_preorder/index.html
@@ -63,7 +63,7 @@ if the cell values are changed to type "float".  The reason is twofold.
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2005 workspace for building and running the example (Windows* systems only).
 <DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X systems only).
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X* systems only).
 </DL>
 
 <H2>To Build</H2>
diff --git a/examples/parallel_for/game_of_life/index.html b/examples/parallel_for/game_of_life/index.html
index 9ca1db9..3dc1d9f 100644
--- a/examples/parallel_for/game_of_life/index.html
+++ b/examples/parallel_for/game_of_life/index.html
@@ -28,7 +28,7 @@ The visualization is written in managed C++ and uses .NET CLR.
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2005 workspace for building and running the example (Windows* systems only).
 <DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X systems only).
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X* systems only).
 </DL>
 
 <H2>To Build</H2>
diff --git a/examples/parallel_for/polygon_overlay/Makefile b/examples/parallel_for/polygon_overlay/Makefile
index 3fb1db2..2333c1f 100644
--- a/examples/parallel_for/polygon_overlay/Makefile
+++ b/examples/parallel_for/polygon_overlay/Makefile
@@ -56,7 +56,7 @@ ifeq ($(UI),mac)
 	cp ../../common/gui/xcode/tbbExample/Info.plist $(NAME).app/Contents
 	cp ../../common/gui/xcode/tbbExample/PkgInfo $(NAME).app/Contents
 	cp ../../common/gui/xcode/tbbExample/en.lproj/* $(APPRES)/en.lproj
-endif # Mac
+endif # OS X*
 
 release: $(SRCFILES) resources
 ifeq ($(compiler),xl)
@@ -65,14 +65,14 @@ ifeq ($(compiler),xl)
 else
 ifeq ($(UI),mac)
 	$(CXX_UI) -O3 -DNDEBUG $(CXXFLAGS) -c $(MACUISOURCES)
-endif # Mac
+endif # OS X*
 	$(CXX) -O3 -DNDEBUG $(CXXFLAGS) -o $(EXE) $(SRCFILES) $(MACUIOBJS) -ltbb -ltbbmalloc $(LIBS)
 endif
 
 debug:  $(SRCFILES) resources
 ifeq ($(UI),mac)
 	$(CXX_UI) -g -O0 -DTBB_USE_DEBUG -D_DEBUG $(CXXFLAGS) -c $(MACUISOURCES)
-endif # Mac
+endif # OS X*
 	$(CXX) -g -O0 -DTBB_USE_DEBUG -D_DEBUG $(CXXFLAGS) -o $(EXE) $(SRCFILES) $(MACUIOBJS) -ltbb_debug -ltbbmalloc_debug $(LIBS)
 
 clean:
diff --git a/examples/parallel_for/polygon_overlay/index.html b/examples/parallel_for/polygon_overlay/index.html
index e26fa93..b48bda1 100644
--- a/examples/parallel_for/polygon_overlay/index.html
+++ b/examples/parallel_for/polygon_overlay/index.html
@@ -82,7 +82,7 @@ One limitation of the program is that if the number of polygons in the source ma
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2005 workspace for building and running the 
     example (Windows* systems only).<DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X 
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X* 
     systems only).</DL>
 
 <H2>To Build</H2>
diff --git a/examples/parallel_for/seismic/Makefile b/examples/parallel_for/seismic/Makefile
index 1b3ca10..f2f7dc9 100644
--- a/examples/parallel_for/seismic/Makefile
+++ b/examples/parallel_for/seismic/Makefile
@@ -72,7 +72,7 @@ ifeq ($(UI),mac)
 	cp ../../common/gui/xcode/tbbExample/Info.plist $(NAME).app/Contents
 	cp ../../common/gui/xcode/tbbExample/PkgInfo $(NAME).app/Contents
 	cp ../../common/gui/xcode/tbbExample/en.lproj/* $(APPRES)/en.lproj
-endif # Mac
+endif # OS X*
 
 release: resources
 ifeq ($(offload), mic)
@@ -80,7 +80,7 @@ ifeq ($(offload), mic)
 endif
 ifeq ($(UI),mac)
 	$(CXX_UI) -O2 -DNDEBUG $(CXXFLAGS) -c $(MACUISOURCES)
-endif # Mac
+endif # OS X*
 	$(CXX) -O2 -DNDEBUG $(CXXFLAGS) -o $(EXE) $(SOURCES) $(MACUIOBJS) $(TBBLIB) $(LIBS)
 
 debug: resources
@@ -89,7 +89,7 @@ ifeq ($(offload), mic)
 endif
 ifeq ($(UI),mac)
 	$(CXX_UI) -g -O0 -DTBB_USE_DEBUG $(CXXFLAGS) -c $(MACUISOURCES)
-endif # Mac
+endif # OS X*
 	$(CXX) -g -O0 -DTBB_USE_DEBUG $(CXXFLAGS) -o $(EXE) $(SOURCES) $(MACUIOBJS) $(TBBLIB_DEBUG) $(LIBS)
 
 clean:
diff --git a/examples/parallel_for/seismic/index.html b/examples/parallel_for/seismic/index.html
index cd6cbff..06b1a07 100644
--- a/examples/parallel_for/seismic/index.html
+++ b/examples/parallel_for/seismic/index.html
@@ -29,7 +29,7 @@ NOTE: Currently, the offload version does not support GUI and can only be used w
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2005 workspace for building and running the 
     example (Windows* systems only).<DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X 
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X* 
     systems only).</DL>
 
 <H2>To Build</H2>
@@ -41,7 +41,7 @@ The following additional options are supported:
     Direct2D*, X11, or OpenGL*
     (see the description of the <A HREF=../../common/index.html>common GUI code</A>
     for more information on available graphics support).
-    For Linux* and Mac OS* X systems, the best available driver is detected automatically by the Makefile.
+    For Linux* and OS X* systems, the best available driver is detected automatically by the Makefile.
     For Windows* systems, UI=gdi is the default GUI driver; compiling with UI=dd or 
     UI=d2d may offer superior
     performance, but can only be used if the Microsoft* DirectX* SDK is installed on your system 
diff --git a/examples/parallel_for/tachyon/Makefile b/examples/parallel_for/tachyon/Makefile
index 4d972bf..3ed788c 100644
--- a/examples/parallel_for/tachyon/Makefile
+++ b/examples/parallel_for/tachyon/Makefile
@@ -178,7 +178,7 @@ ifeq ($(UI),mac)
 	cp ../../common/gui/xcode/tbbExample/PkgInfo $(NAME)$(SUFFIX).app/Contents
 	cp ../../common/gui/xcode/tbbExample/en.lproj/* $(APPRES)/en.lproj
 	$(CXX_UI) $(MYCXXFLAGS) $(CXXFLAGS) -c $(MACUISOURCES)
-endif # Mac
+endif # OS X*
 	$(CXX) $(MYCXXFLAGS) -o $@ $(SOURCE) $(MACUIOBJS) $(LIBS)
 	$(RM) *.o
 
diff --git a/examples/parallel_for/tachyon/index.html b/examples/parallel_for/tachyon/index.html
index 2001bbc..878cf9a 100644
--- a/examples/parallel_for/tachyon/index.html
+++ b/examples/parallel_for/tachyon/index.html
@@ -114,7 +114,7 @@ Here, <<I>version</I>> is one of the above versions of the example, i.e.,
     Direct2D*, X11, or OpenGL*
 	(see the description of the <A HREF=../../common/index.html>common GUI code</A>
 	for more information on available graphics support).
-    For Linux* and Mac OS* X systems, the best available driver is detected automatically by the Makefile.
+    For Linux* and OS X* systems, the best available driver is detected automatically by the Makefile.
     For Windows* systems, UI=gdi is the default GUI driver; compiling with UI=dd or 
     UI=d2d may offer superior
 	performance, but can only be used if the Microsoft* DirectX* SDK is installed on your system.
diff --git a/examples/parallel_reduce/convex_hull/index.html b/examples/parallel_reduce/convex_hull/index.html
index 43a5264..33e26a5 100644
--- a/examples/parallel_reduce/convex_hull/index.html
+++ b/examples/parallel_reduce/convex_hull/index.html
@@ -21,7 +21,7 @@ Parallel version of convex hull algorithm (quick hull).
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2005 workspace for building and running the 
     example (Windows* systems only).<DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X 
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X* 
     systems only).</DL>
 
 <H2>To Build</H2>
diff --git a/examples/parallel_reduce/primes/index.html b/examples/parallel_reduce/primes/index.html
index 6fb6147..deb9b6c 100644
--- a/examples/parallel_reduce/primes/index.html
+++ b/examples/parallel_reduce/primes/index.html
@@ -24,7 +24,7 @@
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2005 workspace for building and running the 
     example (Windows* systems only).<DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X 
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X* 
     systems only).</DL>
 
 <H2>To Build</H2>
diff --git a/examples/pipeline/square/index.html b/examples/pipeline/square/index.html
index c78d606..a82c315 100644
--- a/examples/pipeline/square/index.html
+++ b/examples/pipeline/square/index.html
@@ -20,7 +20,7 @@ containing decimal integers in text format, and changes each to its square.
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2005 workspace for building and running the 
     example (Windows* systems only).<DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X 
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X*
     systems only).</DL>
 
 <H2>To Build</H2>
diff --git a/examples/task/tree_sum/index.html b/examples/task/tree_sum/index.html
index 7c868b5..7d847d3 100644
--- a/examples/task/tree_sum/index.html
+++ b/examples/task/tree_sum/index.html
@@ -38,7 +38,7 @@ In addition, the scalable_allocator performs better for multi-threaded allocatio
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2005 workspace for building and running the 
     example (Windows* systems only).<DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X 
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X*
     systems only).</DL>
 
 <H2>To Build</H2>
diff --git a/examples/task_group/sudoku/index.html b/examples/task_group/sudoku/index.html
index e1aefe7..b492b78 100644
--- a/examples/task_group/sudoku/index.html
+++ b/examples/task_group/sudoku/index.html
@@ -37,7 +37,7 @@ how to use the task_group interface.
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2008 workspace for building and running the example with the Intel® C++ compiler (Windows* systems only).
 <DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X systems only).
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X* systems only).
 </DL>
 
 <H2>To Build</H2>
diff --git a/examples/task_priority/fractal/Makefile b/examples/task_priority/fractal/Makefile
index c191602..59bb462 100644
--- a/examples/task_priority/fractal/Makefile
+++ b/examples/task_priority/fractal/Makefile
@@ -53,18 +53,18 @@ ifeq ($(UI),mac)
 	cp ../../common/gui/xcode/tbbExample/Info.plist $(NAME).app/Contents
 	cp ../../common/gui/xcode/tbbExample/PkgInfo $(NAME).app/Contents
 	cp ../../common/gui/xcode/tbbExample/en.lproj/* $(APPRES)/en.lproj
-endif # Mac
+endif # OS X*
 
 release: $(SOURCES) resources
 ifeq ($(UI),mac)
 	$(CXX_UI) -O2 -DNDEBUG $(CXXFLAGS) -c $(MACUISOURCES)
-endif # Mac
+endif # OS X*
 	$(CXX) -O2 -DNDEBUG $(CXXFLAGS) -o $(EXE) $(SOURCES) $(MACUIOBJS) -ltbb $(LIBS)
 
 debug: resources
 ifeq ($(UI),mac)
 	$(CXX_UI) -g -O0 -DTBB_USE_DEBUG $(CXXFLAGS) -c $(MACUISOURCES)
-endif # Mac
+endif # OS X*
 	$(CXX) -g -O0 -DTBB_USE_DEBUG $(CXXFLAGS) -o $(EXE) $(SOURCES) $(MACUIOBJS) -ltbb_debug $(LIBS)
 
 clean:
diff --git a/examples/task_priority/fractal/index.html b/examples/task_priority/fractal/index.html
index 5357f3c..ce60c08 100644
--- a/examples/task_priority/fractal/index.html
+++ b/examples/task_priority/fractal/index.html
@@ -26,7 +26,7 @@ The example also has the console mode but in this mode the priorities could not
 <DT><A HREF="msvs">msvs</A>
 <DD>Contains Microsoft* Visual Studio* 2005 workspace for building and running the example (Windows* systems only).
 <DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X systems only).
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X* systems only).
 </DL>
 
 <H2>To Build</H2>
diff --git a/examples/test_all/fibonacci/index.html b/examples/test_all/fibonacci/index.html
index 5d43170..fad590a 100644
--- a/examples/test_all/fibonacci/index.html
+++ b/examples/test_all/fibonacci/index.html
@@ -20,7 +20,7 @@ show any speedup on multiprocessors.
 <DT><A HREF="msvs">msvs</A> 
 <DD>Contains Microsoft* Visual Studio* 2005 workspace for building and running the 
     example (Windows* systems only).<DT><A HREF="xcode">xcode</A>
-<DD>Contains Xcode* IDE workspace for building and running the example (Mac OS* X 
+<DD>Contains Xcode* IDE workspace for building and running the example (OS X* 
     systems only).</DL>
 
 <H2>To Build</H2>
diff --git a/include/tbb/compat/tuple b/include/tbb/compat/tuple
index 5e79b3f..f61e637 100644
--- a/include/tbb/compat/tuple
+++ b/include/tbb/compat/tuple
@@ -46,57 +46,57 @@
 #define __TBB_CONST_NULL_REF_PACK
 //
 #elif __TBB_VARIADIC_MAX == 6
-#define __TBB_T_PACK ,T5
-#define __TBB_U_PACK ,U5
-#define __TBB_TYPENAME_T_PACK , typename T5
-#define __TBB_TYPENAME_U_PACK , typename U5
+#define __TBB_T_PACK ,__T5
+#define __TBB_U_PACK ,__U5
+#define __TBB_TYPENAME_T_PACK , typename __T5
+#define __TBB_TYPENAME_U_PACK , typename __U5
 #define __TBB_NULL_TYPE_PACK , null_type
-#define __TBB_REF_T_PARAM_PACK ,T5& t5
-#define __TBB_CONST_REF_T_PARAM_PACK ,const T5& t5
+#define __TBB_REF_T_PARAM_PACK ,__T5& t5
+#define __TBB_CONST_REF_T_PARAM_PACK ,const __T5& t5
 #define __TBB_T_PARAM_LIST_PACK ,t5
 #define __TBB_CONST_NULL_REF_PACK , const null_type&
 //
 #elif __TBB_VARIADIC_MAX == 7
-#define __TBB_T_PACK ,T5, T6
-#define __TBB_U_PACK ,U5, U6
-#define __TBB_TYPENAME_T_PACK , typename T5 , typename T6
-#define __TBB_TYPENAME_U_PACK , typename U5 , typename U6
+#define __TBB_T_PACK ,__T5, __T6
+#define __TBB_U_PACK ,__U5, __U6
+#define __TBB_TYPENAME_T_PACK , typename __T5 , typename __T6
+#define __TBB_TYPENAME_U_PACK , typename __U5 , typename __U6
 #define __TBB_NULL_TYPE_PACK , null_type, null_type
-#define __TBB_REF_T_PARAM_PACK ,T5& t5, T6& t6
-#define __TBB_CONST_REF_T_PARAM_PACK ,const T5& t5, const T6& t6
+#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6
+#define __TBB_CONST_REF_T_PARAM_PACK ,const __T5& t5, const __T6& t6
 #define __TBB_T_PARAM_LIST_PACK ,t5 ,t6
 #define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&
 //
 #elif __TBB_VARIADIC_MAX == 8
-#define __TBB_T_PACK ,T5, T6, T7
-#define __TBB_U_PACK ,U5, U6, U7
-#define __TBB_TYPENAME_T_PACK , typename T5 , typename T6, typename T7
-#define __TBB_TYPENAME_U_PACK , typename U5 , typename U6, typename U7
+#define __TBB_T_PACK ,__T5, __T6, __T7
+#define __TBB_U_PACK ,__U5, __U6, __U7
+#define __TBB_TYPENAME_T_PACK , typename __T5 , typename __T6, typename __T7
+#define __TBB_TYPENAME_U_PACK , typename __U5 , typename __U6, typename __U7
 #define __TBB_NULL_TYPE_PACK , null_type, null_type, null_type
-#define __TBB_REF_T_PARAM_PACK ,T5& t5, T6& t6, T7& t7
-#define __TBB_CONST_REF_T_PARAM_PACK , const T5& t5, const T6& t6, const T7& t7
+#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6, __T7& t7
+#define __TBB_CONST_REF_T_PARAM_PACK , const __T5& t5, const __T6& t6, const __T7& t7
 #define __TBB_T_PARAM_LIST_PACK ,t5 ,t6 ,t7
 #define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&, const null_type&
 //
 #elif __TBB_VARIADIC_MAX == 9
-#define __TBB_T_PACK ,T5, T6, T7, T8
-#define __TBB_U_PACK ,U5, U6, U7, U8
-#define __TBB_TYPENAME_T_PACK , typename T5, typename T6, typename T7, typename T8
-#define __TBB_TYPENAME_U_PACK , typename U5, typename U6, typename U7, typename U8
+#define __TBB_T_PACK ,__T5, __T6, __T7, __T8
+#define __TBB_U_PACK ,__U5, __U6, __U7, __U8
+#define __TBB_TYPENAME_T_PACK , typename __T5, typename __T6, typename __T7, typename __T8
+#define __TBB_TYPENAME_U_PACK , typename __U5, typename __U6, typename __U7, typename __U8
 #define __TBB_NULL_TYPE_PACK , null_type, null_type, null_type, null_type
-#define __TBB_REF_T_PARAM_PACK ,T5& t5, T6& t6, T7& t7, T8& t8
-#define __TBB_CONST_REF_T_PARAM_PACK , const T5& t5, const T6& t6, const T7& t7, const T8& t8
+#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6, __T7& t7, __T8& t8
+#define __TBB_CONST_REF_T_PARAM_PACK , const __T5& t5, const __T6& t6, const __T7& t7, const __T8& t8
 #define __TBB_T_PARAM_LIST_PACK ,t5 ,t6 ,t7 ,t8
 #define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&, const null_type&, const null_type&
 //
 #elif __TBB_VARIADIC_MAX >= 10
-#define __TBB_T_PACK ,T5, T6, T7, T8, T9
-#define __TBB_U_PACK ,U5, U6, U7, U8, U9
-#define __TBB_TYPENAME_T_PACK , typename T5, typename T6, typename T7, typename T8, typename T9
-#define __TBB_TYPENAME_U_PACK , typename U5, typename U6, typename U7, typename U8, typename U9
+#define __TBB_T_PACK ,__T5, __T6, __T7, __T8, __T9
+#define __TBB_U_PACK ,__U5, __U6, __U7, __U8, __U9
+#define __TBB_TYPENAME_T_PACK , typename __T5, typename __T6, typename __T7, typename __T8, typename __T9
+#define __TBB_TYPENAME_U_PACK , typename __U5, typename __U6, typename __U7, typename __U8, typename __U9
 #define __TBB_NULL_TYPE_PACK , null_type, null_type, null_type, null_type, null_type
-#define __TBB_REF_T_PARAM_PACK ,T5& t5, T6& t6, T7& t7, T8& t8, T9& t9
-#define __TBB_CONST_REF_T_PARAM_PACK , const T5& t5, const T6& t6, const T7& t7, const T8& t8, const T9& t9
+#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6, __T7& t7, __T8& t8, __T9& t9
+#define __TBB_CONST_REF_T_PARAM_PACK , const __T5& t5, const __T6& t6, const __T7& t7, const __T8& t8, const __T9& t9
 #define __TBB_T_PARAM_LIST_PACK ,t5 ,t6 ,t7 ,t8 ,t9
 #define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&, const null_type&, const null_type&, const null_type&
 #endif
@@ -112,18 +112,18 @@ struct null_type { };
 using internal::null_type;
 
 // tuple forward declaration
-template <typename T0=null_type, typename T1=null_type, typename T2=null_type,
-          typename T3=null_type, typename T4=null_type
+template <typename __T0=null_type, typename __T1=null_type, typename __T2=null_type,
+          typename __T3=null_type, typename __T4=null_type
 #if __TBB_VARIADIC_MAX >= 6
-, typename T5=null_type
+, typename __T5=null_type
 #if __TBB_VARIADIC_MAX >= 7
-, typename T6=null_type
+, typename __T6=null_type
 #if __TBB_VARIADIC_MAX >= 8
-, typename T7=null_type
+, typename __T7=null_type
 #if __TBB_VARIADIC_MAX >= 9
-, typename T8=null_type
+, typename __T8=null_type
 #if __TBB_VARIADIC_MAX >= 10
-, typename T9=null_type
+, typename __T9=null_type
 #endif
 #endif
 #endif
@@ -138,18 +138,18 @@ namespace internal {
 inline const null_type cnull() { return null_type(); }
 
 // cons forward declaration
-template <typename HT, typename TT> struct cons;
+template <typename __HT, typename __TT> struct cons;
 
 // type of a component of the cons
-template<int N, typename T>
+template<int __N, typename __T>
 struct component {
-    typedef typename T::tail_type next;
-    typedef typename component<N-1,next>::type type;
+    typedef typename __T::tail_type next;
+    typedef typename component<__N-1,next>::type type;
 };
 
-template<typename T>
-struct component<0,T> {
-    typedef typename T::head_type type;
+template<typename __T>
+struct component<0,__T> {
+    typedef typename __T::head_type type;
 };
 
 template<>
@@ -159,54 +159,54 @@ struct component<0,null_type> {
 
 // const version of component
 
-template<int N, typename T>
-struct component<N, const T>
+template<int __N, typename __T>
+struct component<__N, const __T>
 {
-    typedef typename T::tail_type next;
-    typedef const typename component<N-1,next>::type type;
+    typedef typename __T::tail_type next;
+    typedef const typename component<__N-1,next>::type type;
 };
 
-template<typename T>
-struct component<0, const T>
+template<typename __T>
+struct component<0, const __T>
 {
-    typedef const typename T::head_type type;
+    typedef const typename __T::head_type type;
 };
 
 
 // helper class for getting components of cons
-template< int N>
+template< int __N>
 struct get_helper {
-template<typename HT, typename TT>
-inline static typename component<N, cons<HT,TT> >::type& get(cons<HT,TT>& ti) {
-    return get_helper<N-1>::get(ti.tail);
+template<typename __HT, typename __TT>
+inline static typename component<__N, cons<__HT,__TT> >::type& get(cons<__HT,__TT>& ti) {
+    return get_helper<__N-1>::get(ti.tail);
 }
-template<typename HT, typename TT>
-inline static typename component<N, cons<HT,TT> >::type const& get(const cons<HT,TT>& ti) {
-    return get_helper<N-1>::get(ti.tail);
+template<typename __HT, typename __TT>
+inline static typename component<__N, cons<__HT,__TT> >::type const& get(const cons<__HT,__TT>& ti) {
+    return get_helper<__N-1>::get(ti.tail);
 }
 };
 
 template<>
 struct get_helper<0> {
-template<typename HT, typename TT>
-inline static typename component<0, cons<HT,TT> >::type& get(cons<HT,TT>& ti) {
+template<typename __HT, typename __TT>
+inline static typename component<0, cons<__HT,__TT> >::type& get(cons<__HT,__TT>& ti) {
     return ti.head;
 }
-template<typename HT, typename TT>
-inline static typename component<0, cons<HT,TT> >::type const& get(const cons<HT,TT>& ti) {
+template<typename __HT, typename __TT>
+inline static typename component<0, cons<__HT,__TT> >::type const& get(const cons<__HT,__TT>& ti) {
     return ti.head;
 }
 };
 
 // traits adaptor
-template <typename T0, typename T1, typename T2, typename T3, typename T4 __TBB_TYPENAME_T_PACK>
+template <typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK>
 struct tuple_traits {
-    typedef cons <T0, typename tuple_traits<T1, T2, T3, T4 __TBB_T_PACK , null_type>::U > U;
+    typedef cons <__T0, typename tuple_traits<__T1, __T2, __T3, __T4 __TBB_T_PACK , null_type>::U > U;
 };
 
-template <typename T0>
-struct tuple_traits<T0, null_type, null_type, null_type, null_type __TBB_NULL_TYPE_PACK > {
-    typedef cons<T0, null_type> U;
+template <typename __T0>
+struct tuple_traits<__T0, null_type, null_type, null_type, null_type __TBB_NULL_TYPE_PACK > {
+    typedef cons<__T0, null_type> U;
 };
 
 template<>
@@ -216,11 +216,11 @@ struct tuple_traits<null_type, null_type, null_type, null_type, null_type __TBB_
 
 
 // core cons defs
-template <typename HT, typename TT>
+template <typename __HT, typename __TT>
 struct cons{
 
-    typedef HT head_type;
-    typedef TT tail_type;
+    typedef __HT head_type;
+    typedef __TT tail_type;
 
     head_type head; 
     tail_type tail;
@@ -233,16 +233,16 @@ struct cons{
     // non-default constructors
     cons(head_type& h, const tail_type& t) : head(h), tail(t) { }
 
-    template <typename T0, typename T1, typename T2, typename T3, typename T4 __TBB_TYPENAME_T_PACK >
-    cons(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4 __TBB_CONST_REF_T_PARAM_PACK) :
+    template <typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
+    cons(const __T0& t0, const __T1& t1, const __T2& t2, const __T3& t3, const __T4& t4 __TBB_CONST_REF_T_PARAM_PACK) :
         head(t0), tail(t1, t2, t3, t4 __TBB_T_PARAM_LIST_PACK, cnull()) { }
 
-    template <typename T0, typename T1, typename T2, typename T3, typename T4 __TBB_TYPENAME_T_PACK >
-    cons(T0& t0, T1& t1, T2& t2, T3& t3, T4& t4 __TBB_REF_T_PARAM_PACK) :
+    template <typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
+    cons(__T0& t0, __T1& t1, __T2& t2, __T3& t3, __T4& t4 __TBB_REF_T_PARAM_PACK) :
         head(t0), tail(t1, t2, t3, t4 __TBB_T_PARAM_LIST_PACK , cnull()) { }
 
-    template <typename HT1, typename TT1>
-    cons(const cons<HT1,TT1>& other) : head(other.head), tail(other.tail) { }
+    template <typename __HT1, typename __TT1>
+    cons(const cons<__HT1,__TT1>& other) : head(other.head), tail(other.tail) { }
 
     cons& operator=(const cons& other) { head = other.head; tail = other.tail; return *this; }
 
@@ -257,36 +257,36 @@ struct cons{
     friend bool operator>=(const cons& me, const cons& other) { return !(me<other); }
     friend bool operator<=(const cons& me, const cons& other) { return !(me>other); }
 
-    template<typename HT1, typename TT1>
-    friend bool operator==(const cons<HT,TT>& me, const cons<HT1,TT1>& other) {
+    template<typename __HT1, typename __TT1>
+    friend bool operator==(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) {
         return me.head == other.head && me.tail == other.tail;
     }
 
-    template<typename HT1, typename TT1>
-    friend bool operator<(const cons<HT,TT>& me, const cons<HT1,TT1>& other) {
+    template<typename __HT1, typename __TT1>
+    friend bool operator<(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) {
         return me.head < other.head || (!(other.head < me.head) && me.tail < other.tail);
     }
 
-    template<typename HT1, typename TT1>
-    friend bool operator>(const cons<HT,TT>& me, const cons<HT1,TT1>& other) { return other<me; }
+    template<typename __HT1, typename __TT1>
+    friend bool operator>(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return other<me; }
 
-    template<typename HT1, typename TT1>
-    friend bool operator!=(const cons<HT,TT>& me, const cons<HT1,TT1>& other) { return !(me==other); }
+    template<typename __HT1, typename __TT1>
+    friend bool operator!=(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return !(me==other); }
 
-    template<typename HT1, typename TT1>
-    friend bool operator>=(const cons<HT,TT>& me, const cons<HT1,TT1>& other) { return !(me<other); }
+    template<typename __HT1, typename __TT1>
+    friend bool operator>=(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return !(me<other); }
 
-    template<typename HT1, typename TT1>
-    friend bool operator<=(const cons<HT,TT>& me, const cons<HT1,TT1>& other) { return !(me>other); }
+    template<typename __HT1, typename __TT1>
+    friend bool operator<=(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return !(me>other); }
 
 
 };  // cons
 
 
-template <typename HT>
-struct cons<HT,null_type> { 
+template <typename __HT>
+struct cons<__HT,null_type> { 
 
-    typedef HT head_type;
+    typedef __HT head_type;
     typedef null_type tail_type;
 
     head_type head; 
@@ -299,19 +299,19 @@ struct cons<HT,null_type> {
     cons(const null_type&, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head() { /*std::cout << "default constructor 2\n";*/ }
 
     // non-default constructor
-    template<typename T1>
-    cons(T1& t1, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head(t1) { /*std::cout << "non-default a1, t1== " << t1 << "\n";*/}
+    template<typename __T1>
+    cons(__T1& t1, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head(t1) { /*std::cout << "non-default a1, t1== " << t1 << "\n";*/}
 
     cons(head_type& h, const null_type& = null_type() ) : head(h) { }
     cons(const head_type& t0, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head(t0) { }
 
     // converting constructor
-    template<typename HT1>
-    cons(HT1 h1, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head(h1) { }
+    template<typename __HT1>
+    cons(__HT1 h1, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head(h1) { }
 
     // copy constructor
-    template<typename HT1>
-    cons( const cons<HT1, null_type>& other) : head(other.head) { }
+    template<typename __HT1>
+    cons( const cons<__HT1, null_type>& other) : head(other.head) { }
 
     // assignment operator
     cons& operator=(const cons& other) { head = other.head; return *this; }
@@ -323,27 +323,27 @@ struct cons<HT,null_type> {
     friend bool operator<=(const cons& me, const cons& other) {return !(me>other); }
     friend bool operator>=(const cons& me, const cons& other) {return !(me<other); }
 
-    template<typename HT1>
-    friend bool operator==(const cons<HT,null_type>& me, const cons<HT1,null_type>& other) {
+    template<typename __HT1>
+    friend bool operator==(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) {
         return me.head == other.head;
     }
 
-    template<typename HT1>
-    friend bool operator<(const cons<HT,null_type>& me, const cons<HT1,null_type>& other) {
+    template<typename __HT1>
+    friend bool operator<(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) {
         return me.head < other.head;
     }
 
-    template<typename HT1>
-    friend bool operator>(const cons<HT,null_type>& me, const cons<HT1,null_type>& other) { return other<me; }
+    template<typename __HT1>
+    friend bool operator>(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return other<me; }
 
-    template<typename HT1>
-    friend bool operator!=(const cons<HT,null_type>& me, const cons<HT1,null_type>& other) { return !(me==other); }
+    template<typename __HT1>
+    friend bool operator!=(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return !(me==other); }
 
-    template<typename HT1>
-    friend bool operator<=(const cons<HT,null_type>& me, const cons<HT1,null_type>& other) { return !(me>other); }
+    template<typename __HT1>
+    friend bool operator<=(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return !(me>other); }
 
-    template<typename HT1>
-    friend bool operator>=(const cons<HT,null_type>& me, const cons<HT1,null_type>& other) { return !(me<other); }
+    template<typename __HT1>
+    friend bool operator>=(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return !(me<other); }
 
 };  // cons
 
@@ -351,44 +351,44 @@ template <>
 struct cons<null_type,null_type> { typedef null_type tail_type; static const int length = 0; };
 
 // wrapper for default constructor
-template<typename T>
-inline const T wrap_dcons(T*) { return T(); }
+template<typename __T>
+inline const __T wrap_dcons(__T*) { return __T(); }
 
 } // namespace internal
 
 // tuple definition
-template<typename T0, typename T1, typename T2, typename T3, typename T4 __TBB_TYPENAME_T_PACK >
-class tuple : public internal::tuple_traits<T0, T1, T2, T3, T4 __TBB_T_PACK >::U {
+template<typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
+class tuple : public internal::tuple_traits<__T0, __T1, __T2, __T3, __T4 __TBB_T_PACK >::U {
     // friends
-    template <typename T> friend class tuple_size;
-    template<int N, typename T> friend struct tuple_element;
+    template <typename __T> friend class tuple_size;
+    template<int __N, typename __T> friend struct tuple_element;
 
     // stl components
-    typedef tuple<T0,T1,T2,T3,T4 __TBB_T_PACK > value_type;
+    typedef tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK > value_type;
     typedef value_type *pointer;
     typedef const value_type *const_pointer;
     typedef value_type &reference;
     typedef const value_type &const_reference;
     typedef size_t size_type;
 
-    typedef typename internal::tuple_traits<T0,T1,T2,T3, T4 __TBB_T_PACK >::U my_cons;
+    typedef typename internal::tuple_traits<__T0,__T1,__T2,__T3, __T4 __TBB_T_PACK >::U my_cons;
 
 public:
-    tuple(const T0& t0=internal::wrap_dcons((T0*)NULL)
-          ,const T1& t1=internal::wrap_dcons((T1*)NULL)
-          ,const T2& t2=internal::wrap_dcons((T2*)NULL)
-          ,const T3& t3=internal::wrap_dcons((T3*)NULL)
-          ,const T4& t4=internal::wrap_dcons((T4*)NULL)
+    tuple(const __T0& t0=internal::wrap_dcons((__T0*)NULL)
+          ,const __T1& t1=internal::wrap_dcons((__T1*)NULL)
+          ,const __T2& t2=internal::wrap_dcons((__T2*)NULL)
+          ,const __T3& t3=internal::wrap_dcons((__T3*)NULL)
+          ,const __T4& t4=internal::wrap_dcons((__T4*)NULL)
 #if __TBB_VARIADIC_MAX >= 6
-          ,const T5& t5=internal::wrap_dcons((T5*)NULL)
+          ,const __T5& t5=internal::wrap_dcons((__T5*)NULL)
 #if __TBB_VARIADIC_MAX >= 7
-          ,const T6& t6=internal::wrap_dcons((T6*)NULL)
+          ,const __T6& t6=internal::wrap_dcons((__T6*)NULL)
 #if __TBB_VARIADIC_MAX >= 8
-          ,const T7& t7=internal::wrap_dcons((T7*)NULL)
+          ,const __T7& t7=internal::wrap_dcons((__T7*)NULL)
 #if __TBB_VARIADIC_MAX >= 9
-          ,const T8& t8=internal::wrap_dcons((T8*)NULL)
+          ,const __T8& t8=internal::wrap_dcons((__T8*)NULL)
 #if __TBB_VARIADIC_MAX >= 10
-          ,const T9& t9=internal::wrap_dcons((T9*)NULL)
+          ,const __T9& t9=internal::wrap_dcons((__T9*)NULL)
 #endif
 #endif
 #endif
@@ -397,25 +397,25 @@ public:
           ) :
         my_cons(t0,t1,t2,t3,t4 __TBB_T_PARAM_LIST_PACK) { }
 
-    template<int N>
+    template<int __N>
     struct internal_tuple_element {
-        typedef typename internal::component<N,my_cons>::type type;
+        typedef typename internal::component<__N,my_cons>::type type;
     };
 
-    template<int N>
-    typename internal_tuple_element<N>::type& get() { return internal::get_helper<N>::get(*this); }
+    template<int __N>
+    typename internal_tuple_element<__N>::type& get() { return internal::get_helper<__N>::get(*this); }
 
-    template<int N>
-    typename internal_tuple_element<N>::type const& get() const { return internal::get_helper<N>::get(*this); }
+    template<int __N>
+    typename internal_tuple_element<__N>::type const& get() const { return internal::get_helper<__N>::get(*this); }
 
-    template<typename U1, typename U2>
-    tuple& operator=(const internal::cons<U1,U2>& other) {
+    template<typename __U1, typename __U2>
+    tuple& operator=(const internal::cons<__U1,__U2>& other) {
         my_cons::operator=(other);
         return *this;
     }
 
-    template<typename U1, typename U2>
-    tuple& operator=(const std::pair<U1,U2>& other) {
+    template<typename __U1, typename __U2>
+    tuple& operator=(const std::pair<__U1,__U2>& other) {
         // __TBB_ASSERT(tuple_size<value_type>::value == 2, "Invalid size for pair to tuple assignment");
         this->head = other.first;
         this->tail.head = other.second;
@@ -438,10 +438,10 @@ class tuple<null_type, null_type, null_type, null_type, null_type __TBB_NULL_TYP
 
 // helper classes
 
-template < typename T>
+template < typename __T>
 class tuple_size {
 public:
-    static const size_t value = 1 + tuple_size<typename T::tail_type>::value;
+    static const size_t value = 1 + tuple_size<typename __T::tail_type>::value;
 };
 
 template <>
@@ -456,18 +456,18 @@ public:
     static const size_t value = 0;
 };
 
-template<int N, typename T>
+template<int __N, typename __T>
 struct tuple_element {
-    typedef typename internal::component<N, typename T::my_cons>::type type;
+    typedef typename internal::component<__N, typename __T::my_cons>::type type;
 };
 
-template<int N, typename T0, typename T1, typename T2, typename T3, typename T4 __TBB_TYPENAME_T_PACK >
-inline static typename tuple_element<N,tuple<T0,T1,T2,T3,T4 __TBB_T_PACK > >::type&
-    get(tuple<T0,T1,T2,T3,T4 __TBB_T_PACK >& t) { return internal::get_helper<N>::get(t); }
+template<int __N, typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
+inline static typename tuple_element<__N,tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK > >::type&
+    get(tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK >& t) { return internal::get_helper<__N>::get(t); }
 
-template<int N, typename T0, typename T1, typename T2, typename T3, typename T4 __TBB_TYPENAME_T_PACK >
-inline static typename tuple_element<N,tuple<T0,T1,T2,T3,T4 __TBB_T_PACK > >::type const&
-    get(const tuple<T0,T1,T2,T3,T4 __TBB_T_PACK >& t) { return internal::get_helper<N>::get(t); }
+template<int __N, typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
+inline static typename tuple_element<__N,tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK > >::type const&
+    get(const tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK >& t) { return internal::get_helper<__N>::get(t); }
 
 }  // interface5
 } // tbb
diff --git a/include/tbb/concurrent_hash_map.h b/include/tbb/concurrent_hash_map.h
index c130faf..c5e7946 100644
--- a/include/tbb/concurrent_hash_map.h
+++ b/include/tbb/concurrent_hash_map.h
@@ -1006,10 +1006,9 @@ bool concurrent_hash_map<Key,T,HashCompare,A>::lookup( bool op_insert, const Key
         // TODO: the following seems as generic/regular operation
         // acquire the item
         if( !result->try_acquire( n->mutex, write ) ) {
-            // we are unlucky, prepare for longer wait
-            tbb::internal::atomic_backoff trials;
-            do {
-                if( !trials.bounded_pause() ) {
+            for( tbb::internal::atomic_backoff backoff(true);; ) {
+                if( result->try_acquire( n->mutex, write ) ) break;
+                if( !backoff.bounded_pause() ) {
                     // the wait takes really long, restart the operation
                     b.release();
                     __TBB_ASSERT( !op_insert || !return_value, "Can't acquire new item in locked bucket?" );
@@ -1017,7 +1016,7 @@ bool concurrent_hash_map<Key,T,HashCompare,A>::lookup( bool op_insert, const Key
                     m = (hashcode_t) itt_load_word_with_acquire( my_mask );
                     goto restart;
                 }
-            } while( !result->try_acquire( n->mutex, write ) );
+            }
         }
     }//lock scope
     result->my_node = n;
diff --git a/include/tbb/concurrent_priority_queue.h b/include/tbb/concurrent_priority_queue.h
index cc7668d..f368bb9 100644
--- a/include/tbb/concurrent_priority_queue.h
+++ b/include/tbb/concurrent_priority_queue.h
@@ -39,6 +39,10 @@
 #include <iterator>
 #include <functional>
 
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    #include <initializer_list>
+#endif
+
 namespace tbb {
 namespace interface5 {
 
@@ -83,14 +87,24 @@ class concurrent_priority_queue {
     //! [begin,end) constructor
     template<typename InputIterator>
     concurrent_priority_queue(InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) :
-        data(begin, end, a)
+        mark(0), data(begin, end, a)
     {
-        mark = 0;
         my_aggregator.initialize_handler(my_functor_t(this));
         heapify();
         my_size = data.size();
     }
 
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! Constructor from std::initializer_list
+    concurrent_priority_queue(std::initializer_list<T> const& init_list, const allocator_type &a = allocator_type()) :
+        mark(0),data(init_list.begin(), init_list.end(), a)
+    {
+        my_aggregator.initialize_handler(my_functor_t(this));
+        heapify();
+        my_size = data.size();
+    }
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
     //! Copy constructor
     /** This operation is unsafe if there are pending concurrent operations on the src queue. */
     explicit concurrent_priority_queue(const concurrent_priority_queue& src) : mark(src.mark),
@@ -120,6 +134,26 @@ class concurrent_priority_queue {
         return *this;
     }
 
+    //! Assign the queue from [begin,end) range, not thread-safe
+    template<typename InputIterator>
+    void assign(InputIterator begin, InputIterator end) {
+        std::vector<value_type, allocator_type>(begin, end, data.get_allocator()).swap(data);
+        mark = 0;
+        my_size = data.size();
+        heapify();
+    }
+
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! Assign the queue from std::initializer_list, not thread-safe
+    void assign(std::initializer_list<T> const& il) { this->assign(il.begin(), il.end()); }
+
+    //! Assign from std::initializer_list, not thread-safe
+    concurrent_priority_queue& operator=(std::initializer_list<T> const& il) {
+        this->assign(il.begin(), il.end());
+        return *this;
+    }
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
     //! Returns true if empty, false otherwise
     /** Returned value may not reflect results of pending operations.
         This operation reads shared data and will trigger a race condition. */
diff --git a/include/tbb/flow_graph.h b/include/tbb/flow_graph.h
index 4a68e24..cd0550c 100644
--- a/include/tbb/flow_graph.h
+++ b/include/tbb/flow_graph.h
@@ -1958,33 +1958,44 @@ private:
 public:
     typedef OutputTuple output_type;
     typedef typename unfolded_type::input_ports_type input_ports_type;
-    template<typename B0, typename B1>
-    join_node(graph &g, B0 b0, B1 b1) : unfolded_type(g, b0, b1) { }
-    template<typename B0, typename B1, typename B2>
-    join_node(graph &g, B0 b0, B1 b1, B2 b2) : unfolded_type(g, b0, b1, b2) { }
-    template<typename B0, typename B1, typename B2, typename B3>
-    join_node(graph &g, B0 b0, B1 b1, B2 b2, B3 b3) : unfolded_type(g, b0, b1, b2, b3) { }
-    template<typename B0, typename B1, typename B2, typename B3, typename B4>
-    join_node(graph &g, B0 b0, B1 b1, B2 b2, B3 b3, B4 b4) : unfolded_type(g, b0, b1, b2, b3, b4) { }
+    template<typename __TBB_B0, typename __TBB_B1>
+    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1) : unfolded_type(g, b0, b1) { }
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2>
+    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2) : unfolded_type(g, b0, b1, b2) { }
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3>
+    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3) : unfolded_type(g, b0, b1, b2, b3) { }
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4>
+    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4) :
+            unfolded_type(g, b0, b1, b2, b3, b4) { }
 #if __TBB_VARIADIC_MAX >= 6
-    template<typename B0, typename B1, typename B2, typename B3, typename B4, typename B5>
-    join_node(graph &g, B0 b0, B1 b1, B2 b2, B3 b3, B4 b4, B5 b5) : unfolded_type(g, b0, b1, b2, b3, b4, b5) { }
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4,
+        typename __TBB_B5>
+    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5) :
+            unfolded_type(g, b0, b1, b2, b3, b4, b5) { }
 #endif
 #if __TBB_VARIADIC_MAX >= 7
-    template<typename B0, typename B1, typename B2, typename B3, typename B4, typename B5, typename B6>
-    join_node(graph &g, B0 b0, B1 b1, B2 b2, B3 b3, B4 b4, B5 b5, B6 b6) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6) { }
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4,
+        typename __TBB_B5, typename __TBB_B6>
+    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6) :
+            unfolded_type(g, b0, b1, b2, b3, b4, b5, b6) { }
 #endif
 #if __TBB_VARIADIC_MAX >= 8
-    template<typename B0, typename B1, typename B2, typename B3, typename B4, typename B5, typename B6, typename B7>
-    join_node(graph &g, B0 b0, B1 b1, B2 b2, B3 b3, B4 b4, B5 b5, B6 b6, B7 b7) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7) { }
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4,
+        typename __TBB_B5, typename __TBB_B6, typename __TBB_B7>
+    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6,
+            __TBB_B7 b7) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7) { }
 #endif
 #if __TBB_VARIADIC_MAX >= 9
-    template<typename B0, typename B1, typename B2, typename B3, typename B4, typename B5, typename B6, typename B7, typename B8>
-    join_node(graph &g, B0 b0, B1 b1, B2 b2, B3 b3, B4 b4, B5 b5, B6 b6, B7 b7, B8 b8) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8) { }
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4,
+        typename __TBB_B5, typename __TBB_B6, typename __TBB_B7, typename __TBB_B8>
+    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6,
+            __TBB_B7 b7, __TBB_B8 b8) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8) { }
 #endif
 #if __TBB_VARIADIC_MAX >= 10
-    template<typename B0, typename B1, typename B2, typename B3, typename B4, typename B5, typename B6, typename B7, typename B8, typename B9>
-    join_node(graph &g, B0 b0, B1 b1, B2 b2, B3 b3, B4 b4, B5 b5, B6 b6, B7 b7, B8 b8, B9 b9) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9) { }
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4,
+        typename __TBB_B5, typename __TBB_B6, typename __TBB_B7, typename __TBB_B8, typename __TBB_B9>
+    join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6,
+            __TBB_B7 b7, __TBB_B8 b8, __TBB_B9 b9) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9) { }
 #endif
     join_node(const join_node &other) : unfolded_type(other) {}
 };
diff --git a/include/tbb/internal/_concurrent_queue_impl.h b/include/tbb/internal/_concurrent_queue_impl.h
index 8058102..a461e62 100644
--- a/include/tbb/internal/_concurrent_queue_impl.h
+++ b/include/tbb/internal/_concurrent_queue_impl.h
@@ -216,14 +216,14 @@ public:
 
 template<typename T>
 void micro_queue<T>::spin_wait_until_my_turn( atomic<ticket>& counter, ticket k, concurrent_queue_rep_base& rb ) const {
-    atomic_backoff backoff;
-    do {
-        backoff.pause();
-        if( counter&1 ) {
+    for( atomic_backoff b(true);;b.pause() ) {
+        ticket c = counter;
+        if( c==k ) return;
+        else if( c&1 ) {
             ++rb.n_invalid_entries;
             throw_exception( eid_bad_last_alloc );
         }
-    } while( counter!=k ) ;
+    }
 }
 
 template<typename T>
diff --git a/include/tbb/internal/_flow_graph_node_impl.h b/include/tbb/internal/_flow_graph_node_impl.h
index 4a36fc9..4467ebf 100644
--- a/include/tbb/internal/_flow_graph_node_impl.h
+++ b/include/tbb/internal/_flow_graph_node_impl.h
@@ -484,6 +484,10 @@ namespace internal {
         continue_input( const continue_input& src ) : continue_receiver(src), 
             my_root_task(src.my_root_task), my_body( src.my_body->clone() ) {}
 
+        ~continue_input() {
+            delete my_body;
+        }
+
         template< typename Body >
         Body copy_function_object() {
             internal::function_body<input_type, output_type> &body_ref = *my_body;
diff --git a/include/tbb/machine/linux_ia32.h b/include/tbb/machine/linux_ia32.h
index ff2f5b4..9d7f7eb 100644
--- a/include/tbb/machine/linux_ia32.h
+++ b/include/tbb/machine/linux_ia32.h
@@ -158,7 +158,7 @@ static inline void __TBB_machine_and( volatile void *ptr, uint32_t addend ) {
     __asm__ __volatile__("lock\nandl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory");
 }
 
-//TODO: Check if it possible and profitable for IA-32 on (Linux and Windows)
+//TODO: Check if it possible and profitable for IA-32 architecture on (Linux* and Windows*)
 //to use of 64-bit load/store via floating point registers together with full fence
 //for sequentially consistent load/store, instead of CAS.
 
diff --git a/include/tbb/machine/macos_common.h b/include/tbb/machine/macos_common.h
index b66b213..f7c36f5 100644
--- a/include/tbb/machine/macos_common.h
+++ b/include/tbb/machine/macos_common.h
@@ -62,7 +62,7 @@ static inline int __TBB_macos_available_cpu() {
 
 static inline int64_t __TBB_machine_cmpswp8_OsX(volatile void *ptr, int64_t value, int64_t comparand)
 {
-    __TBB_ASSERT( tbb::internal::is_aligned(ptr,8), "address not properly aligned for Mac OS atomics");
+    __TBB_ASSERT( tbb::internal::is_aligned(ptr,8), "address not properly aligned for OS X* atomics");
     int64_t* address = (int64_t*)ptr;
     while( !OSAtomicCompareAndSwap64Barrier(comparand, value, address) ){
 #if __TBB_WORDSIZE==8
@@ -107,7 +107,7 @@ static inline int64_t __TBB_machine_cmpswp8_OsX(volatile void *ptr, int64_t valu
 
 static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand)
 {
-    __TBB_ASSERT( tbb::internal::is_aligned(ptr,4), "address not properly aligned for Mac OS atomics");
+    __TBB_ASSERT( tbb::internal::is_aligned(ptr,4), "address not properly aligned for OS X* atomics");
     int32_t* address = (int32_t*)ptr;
     while( !OSAtomicCompareAndSwap32Barrier(comparand, value, address) ){
         int32_t snapshot = *address;
@@ -118,13 +118,13 @@ static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, i
 
 static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t addend)
 {
-    __TBB_ASSERT( tbb::internal::is_aligned(ptr,4), "address not properly aligned for Mac OS atomics");
+    __TBB_ASSERT( tbb::internal::is_aligned(ptr,4), "address not properly aligned for OS X* atomics");
     return OSAtomicAdd32Barrier(addend, (int32_t*)ptr) - addend;
 }
 
 static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
 {
-    __TBB_ASSERT( tbb::internal::is_aligned(ptr,8), "address not properly aligned for Mac OS atomics");
+    __TBB_ASSERT( tbb::internal::is_aligned(ptr,8), "address not properly aligned for OS X* atomics");
     return OSAtomicAdd64Barrier(addend, (int64_t*)ptr) - addend;
 }
 
diff --git a/include/tbb/machine/mic_common.h b/include/tbb/machine/mic_common.h
index ffc252e..ca423d8 100644
--- a/include/tbb/machine/mic_common.h
+++ b/include/tbb/machine/mic_common.h
@@ -46,7 +46,7 @@
 #define __TBB_cl_evict(p) _mm_clevict(p, _MM_HINT_T1)
 #endif
 
-/** Early Intel(R) MIC Architecture does not support mfence and pause instructions **/
+/** Early Intel(R) Many Integrated Core Architecture does not support mfence and pause instructions **/
 #define __TBB_full_memory_fence __TBB_release_consistency_helper
 #define __TBB_Pause(x) _mm_delay_32(16*(x))
 #define __TBB_STEALING_PAUSE 1500/16
diff --git a/include/tbb/machine/windows_ia32.h b/include/tbb/machine/windows_ia32.h
index 491c392..43d21bc 100644
--- a/include/tbb/machine/windows_ia32.h
+++ b/include/tbb/machine/windows_ia32.h
@@ -137,7 +137,7 @@ static inline void __TBB_machine_AND( volatile void *operand, __int32 addend ) {
 #define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
 #define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)
 
-//TODO: Check if it possible and profitable for IA-32 on (Linux and Windows)
+//TODO: Check if it possible and profitable for IA-32 architecture on (Linux and Windows)
 //to use of 64-bit load/store via floating point registers together with full fence
 //for sequentially consistent load/store, instead of CAS.
 #define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE           1
diff --git a/include/tbb/memory_pool.h b/include/tbb/memory_pool.h
index da26d68..8205936 100644
--- a/include/tbb/memory_pool.h
+++ b/include/tbb/memory_pool.h
@@ -36,7 +36,7 @@
 
 #include "scalable_allocator.h"
 #include "tbb_stddef.h"
-#include "tbb_machine.h" // TODO: avoid linkage with libtbb on IA-64
+#include "tbb_machine.h" // TODO: avoid linkage with libtbb on IA-64 architecture
 #include "tbb/atomic.h" // for as_atomic
 #include <new> // std::bad_alloc
 #if __TBB_CPP11_RVALUE_REF_PRESENT && !__TBB_CPP11_STD_FORWARD_BROKEN
diff --git a/include/tbb/partitioner.h b/include/tbb/partitioner.h
index c921beb..2839c4b 100644
--- a/include/tbb/partitioner.h
+++ b/include/tbb/partitioner.h
@@ -403,7 +403,7 @@ public:
         return my_divisor > 1;
     }
     bool should_create_trap() {
-        return true; // TODO: rethink for the stage after memorizing level
+        return false;
     }
     static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
 };
diff --git a/include/tbb/tbb_config.h b/include/tbb/tbb_config.h
index 920b9a3..769ba5a 100644
--- a/include/tbb/tbb_config.h
+++ b/include/tbb/tbb_config.h
@@ -420,7 +420,7 @@
 #endif
 
 //TODO: recheck for different clang versions 
-#if __GLIBC__==2 && __GLIBC_MINOR__==3 || __MINGW32__ || (__APPLE__ && (__clang__ || __INTEL_COMPILER==1200 && !TBB_USE_DEBUG))
+#if __GLIBC__==2 && __GLIBC_MINOR__==3 || __MINGW32__ || (__APPLE__ && ( __INTEL_COMPILER==1200 && !TBB_USE_DEBUG))
     /** Macro controlling EH usages in TBB tests.
         Some older versions of glibc crash when exception handling happens concurrently. **/
     #define __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN 1
diff --git a/include/tbb/tbb_machine.h b/include/tbb/tbb_machine.h
index aecbbdf..bd8579e 100644
--- a/include/tbb/tbb_machine.h
+++ b/include/tbb/tbb_machine.h
@@ -105,13 +105,13 @@
     __TBB_control_consistency_helper()
         Bridges the memory-semantics gap between architectures providing only
         implicit C++0x "consume" semantics (like Power Architecture) and those
-        also implicitly obeying control dependencies (like IA-64).
+        also implicitly obeying control dependencies (like IA-64 architecture).
         It must be used only in conditional code where the condition is itself
         data-dependent, and will then make subsequent code behave as if the
         original data dependency were acquired.
         It needs only a compiler fence where implied by the architecture
-        either specifically (like IA-64) or because generally stronger "acquire"
-        semantics are enforced (like x86).
+        either specifically (like IA-64 architecture) or because generally stronger 
+        "acquire" semantics are enforced (like x86).
         It is always valid, though potentially suboptimal, to replace
         control with acquire on the load and then remove the helper.
 
@@ -360,7 +360,12 @@ class atomic_backoff : no_copy {
     static const int32_t LOOPS_BEFORE_YIELD = 16;
     int32_t count;
 public:
+    // In many cases, an object of this type is initialized eagerly on hot path,
+    // as in for(atomic_backoff b; ; b.pause()) { /*loop body*/ }
+    // For this reason, the construction cost must be very small!
     atomic_backoff() : count(1) {}
+    // This constructor pauses immediately; do not use on hot paths!
+    atomic_backoff( bool ) : count(1) { pause(); }
 
     //! Pause for a while.
     void pause() {
@@ -452,7 +457,7 @@ inline T __TBB_MaskedCompareAndSwap (volatile T * const ptr, const T value, cons
     const uint32_t shifted_comparand = ((uint32_t)comparand << bits_to_shift)&mask;
     const uint32_t shifted_value     = ((uint32_t)value     << bits_to_shift)&mask;
 
-    for(atomic_backoff b;;b.pause()) {
+    for( atomic_backoff b;;b.pause() ) {
         const uint32_t surroundings  = *aligned_ptr & ~mask ; // may have changed during the pause
         const uint32_t big_comparand = surroundings | shifted_comparand ;
         const uint32_t big_value     = surroundings | shifted_value     ;
@@ -506,28 +511,24 @@ inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, ui
 
 template<size_t S, typename T>
 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
-    atomic_backoff b;
     T result;
-    for(;;) {
+    for( atomic_backoff b;;b.pause() ) {
         result = *reinterpret_cast<volatile T *>(ptr);
         // __TBB_CompareAndSwapGeneric presumed to have full fence.
         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result )
             break;
-        b.pause();
     }
     return result;
 }
 
 template<size_t S, typename T>
 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
-    atomic_backoff b;
     T result;
-    for(;;) {
+    for( atomic_backoff b;;b.pause() ) {
         result = *reinterpret_cast<volatile T *>(ptr);
         // __TBB_CompareAndSwapGeneric presumed to have full fence.
         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result )
             break;
-        b.pause();
     }
     return result;
 }
@@ -864,24 +865,20 @@ inline intptr_t __TBB_Log2( uintptr_t x ) {
 
 #ifndef __TBB_AtomicOR
 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
-    tbb::internal::atomic_backoff b;
-    for(;;) {
+    for( tbb::internal::atomic_backoff b;;b.pause() ) {
         uintptr_t tmp = *(volatile uintptr_t *)operand;
         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
         if( result==tmp ) break;
-        b.pause();
     }
 }
 #endif
 
 #ifndef __TBB_AtomicAND
 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
-    tbb::internal::atomic_backoff b;
-    for(;;) {
+    for( tbb::internal::atomic_backoff b;;b.pause() ) {
         uintptr_t tmp = *(volatile uintptr_t *)operand;
         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
         if( result==tmp ) break;
-        b.pause();
     }
 }
 #endif
@@ -909,12 +906,8 @@ inline bool __TBB_TryLockByte( __TBB_atomic_flag &flag ) {
 
 #ifndef __TBB_LockByte
 inline __TBB_Flag __TBB_LockByte( __TBB_atomic_flag& flag ) {
-    if ( !__TBB_TryLockByte(flag) ) {
-        tbb::internal::atomic_backoff b;
-        do {
-            b.pause();
-        } while ( !__TBB_TryLockByte(flag) );
-    }
+    tbb::internal::atomic_backoff backoff;
+    while( !__TBB_TryLockByte(flag) ) backoff.pause();
     return 0;
 }
 #endif
diff --git a/include/tbb/tbb_stddef.h b/include/tbb/tbb_stddef.h
index 1c5b581..916f1ea 100644
--- a/include/tbb/tbb_stddef.h
+++ b/include/tbb/tbb_stddef.h
@@ -34,7 +34,7 @@
 #define TBB_VERSION_MINOR 1
 
 // Engineering-focused interface version
-#define TBB_INTERFACE_VERSION 6104
+#define TBB_INTERFACE_VERSION 6105
 #define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000
 
 // The oldest major interface version still supported
@@ -241,7 +241,7 @@ const size_t NFS_MaxLineSize = 128;
     both as a way to have the compiler help enforce use of the label and to quickly rule out
     one potential issue.
 
-    Note however that, with some architecture/compiler combinations, e.g. on IA-64, "volatile" 
+    Note however that, with some architecture/compiler combinations, e.g. on IA-64 architecture, "volatile" 
     also has non-portable memory semantics that are needlessly expensive for "relaxed" operations.
 
     Note that this must only be applied to data that will not change bit patterns when cast to/from
diff --git a/src/Makefile b/src/Makefile
index 7649e9f..3e792d5 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -28,6 +28,10 @@ tbb_root?=..
 examples_root:=$(tbb_root)/examples
 include $(tbb_root)/build/common.inc
 
+#workaround for non-depend targets tbb and tbbmalloc which both depend on version_string.ver
+#According to documentation, recursively invoked make commands can process their targets in parallel
+.NOTPARALLEL:
+
 .PHONY: all tbb tbbmalloc tbbproxy test test_no_depends release debug examples clean
 
 all: release debug examples
diff --git a/src/index.html b/src/index.html
index d990480..29c6de5 100644
--- a/src/index.html
+++ b/src/index.html
@@ -2,7 +2,7 @@
 <BODY>
 
 <H2>Overview</H2>
-This directory contains the source code and unit tests for Threading Building Blocks.
+This directory contains the source code and unit tests for Intel® Threading Building Blocks.
 
 <H2>Directories</H2>
 <DL>
diff --git a/src/old/concurrent_queue_v2.cpp b/src/old/concurrent_queue_v2.cpp
index 9a4fafa..3c0a8da 100644
--- a/src/old/concurrent_queue_v2.cpp
+++ b/src/old/concurrent_queue_v2.cpp
@@ -236,14 +236,11 @@ concurrent_queue_base::~concurrent_queue_base() {
 void concurrent_queue_base::internal_push( const void* src ) {
     concurrent_queue_rep& r = *my_rep;
     concurrent_queue_rep::ticket k  = r.tail_counter++;
-    ptrdiff_t e = my_capacity;
-    if( e<concurrent_queue_rep::infinite_capacity ) {
+    if( my_capacity<concurrent_queue_rep::infinite_capacity ) {
+        // Capacity is limited, wait to not exceed it
         atomic_backoff backoff;
-        for(;;) {
-            if( (ptrdiff_t)(k-r.head_counter)<e ) break;
+        while( (ptrdiff_t)(k-r.head_counter)>=const_cast<volatile ptrdiff_t&>(my_capacity) )
             backoff.pause();
-            e = const_cast<volatile ptrdiff_t&>(my_capacity);
-        }
     }
     r.choose(k).push(src,k,*this);
 }
@@ -260,7 +257,7 @@ bool concurrent_queue_base::internal_pop_if_present( void* dst ) {
     concurrent_queue_rep& r = *my_rep;
     concurrent_queue_rep::ticket k;
     do {
-        for( atomic_backoff backoff;;backoff.pause() ) {
+        for( atomic_backoff b;;b.pause() ) {
             k = r.head_counter;
             if( r.tail_counter<=k ) {
                 // Queue is empty
@@ -279,7 +276,7 @@ bool concurrent_queue_base::internal_pop_if_present( void* dst ) {
 bool concurrent_queue_base::internal_push_if_not_full( const void* src ) {
     concurrent_queue_rep& r = *my_rep;
     concurrent_queue_rep::ticket k;
-    for( atomic_backoff backoff;;backoff.pause() ) {
+    for( atomic_backoff b;;b.pause() ) {
         k = r.tail_counter;
         if( (ptrdiff_t)(k-r.head_counter)>=my_capacity ) {
             // Queue is full
diff --git a/src/old/concurrent_vector_v2.cpp b/src/old/concurrent_vector_v2.cpp
index 14be6e7..2129b16 100644
--- a/src/old/concurrent_vector_v2.cpp
+++ b/src/old/concurrent_vector_v2.cpp
@@ -93,11 +93,9 @@ void concurrent_vector_base::helper::extend_segment( concurrent_vector_base& v )
     // If other threads are trying to set pointers in the short segment, wait for them to finish their
     // assignments before we copy the short segment to the long segment.
     atomic_backoff backoff;
-    while( !v.my_storage[0].array || !v.my_storage[1].array ) {
-        backoff.pause();
-    }
-    s[0] = v.my_storage[0]; 
-    s[1] = v.my_storage[1]; 
+    while( !v.my_storage[0].array || !v.my_storage[1].array ) backoff.pause();
+    s[0] = v.my_storage[0];
+    s[1] = v.my_storage[1];
     if( v.my_segment.compare_and_swap( s, v.my_storage )!=v.my_storage )
         NFS_Free(s);
 }
diff --git a/src/old/spin_rw_mutex_v2.cpp b/src/old/spin_rw_mutex_v2.cpp
index 30cf46e..d3a6ef7 100644
--- a/src/old/spin_rw_mutex_v2.cpp
+++ b/src/old/spin_rw_mutex_v2.cpp
@@ -100,8 +100,8 @@ bool spin_rw_mutex::internal_upgrade(spin_rw_mutex *mutex) {
         if( CAS(mutex->state, s | WRITER_PENDING, s) )
         {
             ITT_NOTIFY(sync_prepare, mutex);
-            for( atomic_backoff backoff; (mutex->state & READERS) != ONE_READER; )
-                backoff.pause(); // while more than 1 reader
+            atomic_backoff backoff;
+            while( (mutex->state & READERS) != ONE_READER ) backoff.pause();
             __TBB_ASSERT(mutex->state == (ONE_READER | WRITER_PENDING),"invalid state when upgrading to writer");
             // both new readers and writers are blocked at this time
             mutex->state = WRITER;
@@ -139,7 +139,7 @@ void spin_rw_mutex::internal_release_reader(spin_rw_mutex *mutex)
 bool spin_rw_mutex::internal_try_acquire_writer( spin_rw_mutex * mutex )
 {
     // for a writer: only possible to acquire if no active readers or writers
-    state_t s = mutex->state; // on IA-64, this volatile load has acquire semantic
+    state_t s = mutex->state; // on IA-64 architecture, this volatile load has acquire semantic
     if( !(s & BUSY) ) // no readers, no writers; mask is 1..1101
         if( CAS(mutex->state, WRITER, s) ) {
             ITT_NOTIFY(sync_acquired, mutex);
@@ -152,7 +152,7 @@ bool spin_rw_mutex::internal_try_acquire_writer( spin_rw_mutex * mutex )
 bool spin_rw_mutex::internal_try_acquire_reader( spin_rw_mutex * mutex )
 {
     // for a reader: acquire if no active or waiting writers
-    state_t s = mutex->state;    // on IA-64, a load of volatile variable has acquire semantic
+    state_t s = mutex->state;    // on IA-64 architecture, a load of volatile variable has acquire semantic
     while( !(s & (WRITER|WRITER_PENDING)) ) // no writers
         if( CAS(mutex->state, s+ONE_READER, s) ) {
             ITT_NOTIFY(sync_acquired, mutex);
diff --git a/src/rml/server/irml.rc b/src/rml/server/irml.rc
index 6b1974a..27fd0ff 100644
--- a/src/rml/server/irml.rc
+++ b/src/rml/server/irml.rc
@@ -87,7 +87,7 @@ BEGIN
         BLOCK "000004b0"
         BEGIN
             VALUE "CompanyName", "Intel Corporation\0"
-            VALUE "FileDescription", "Threading Building Blocks resource manager library\0"
+            VALUE "FileDescription", "Intel(R) Threading Building Blocks resource manager library\0"
             VALUE "FileVersion", TBB_VERSION "\0"
 //what is it?            VALUE "InternalName", "irml\0"
             VALUE "LegalCopyright", "Copyright 2005-2013 Intel Corporation.  All Rights Reserved.\0"
diff --git a/src/rml/server/rml_server.cpp b/src/rml/server/rml_server.cpp
index b8dc62c..298d573 100644
--- a/src/rml/server/rml_server.cpp
+++ b/src/rml/server/rml_server.cpp
@@ -1840,14 +1840,9 @@ void omp_connection_v2::get_threads( size_type request_size, void* cookie, job*
 void omp_dispatch_type::consume() {
     // Wait for short window between when master sets state of this thread to ts_omp_busy
     // and master thread calls produce.
-    job_type* j = job; 
-    if( !j ) {
-        tbb::internal::atomic_backoff bo;
-        do {
-            bo.pause();
-            j = job;
-        } while( !j );
-    }
+    job_type* j;
+    tbb::internal::atomic_backoff backoff;
+    while( (j = job)==NULL ) backoff.pause();
     job = static_cast<job_type*>(NULL);
     client->process(*j,cookie,index);
 #if TBB_USE_ASSERT
@@ -3154,13 +3149,8 @@ void connection_scavenger_thread::add_request( generic_connection<Server,Client>
 {
     uintptr_t conn_ex = (uintptr_t)conn_to_close | (connection_traits<Server,Client>::is_tbb<<1);
     __TBB_ASSERT( !conn_to_close->next_conn, NULL );
-    uintptr_t old_tail_ex = connections_to_reclaim.tail;
+    const uintptr_t old_tail_ex = connections_to_reclaim.tail.fetch_and_store(conn_ex);
     __TBB_ASSERT( old_tail_ex==0||old_tail_ex>garbage_connection_queue::plugged_acked, "Unloading DLL called while this connection is being closed?" );
-    tbb::internal::atomic_backoff backoff;
-    while( connections_to_reclaim.tail.compare_and_swap( conn_ex, old_tail_ex )!=old_tail_ex ) {
-        backoff.pause();
-        old_tail_ex = connections_to_reclaim.tail;
-    }
 
     if( old_tail_ex==garbage_connection_queue::empty )
         connections_to_reclaim.head = conn_ex;
diff --git a/src/rml/test/test_thread_monitor.cpp b/src/rml/test/test_thread_monitor.cpp
index 42be091..025bb08 100644
--- a/src/rml/test/test_thread_monitor.cpp
+++ b/src/rml/test/test_thread_monitor.cpp
@@ -78,7 +78,7 @@ void ThreadState::loop() {
     }
 }
 
-// Linux on IA-64 seems to require at least 1<<18 bytes per stack.
+// Linux on IA-64 architecture seems to require at least 1<<18 bytes per stack.
 const size_t MinStackSize = 1<<18;
 const size_t MaxStackSize = 1<<22;
 
diff --git a/src/tbb/cilk-tbb-interop.h b/src/tbb/cilk-tbb-interop.h
index 9cd1a21..6e2318b 100644
--- a/src/tbb/cilk-tbb-interop.h
+++ b/src/tbb/cilk-tbb-interop.h
@@ -26,7 +26,8 @@
     the GNU General Public License.
 */
 
-/* The API to enable interoperability between Intel(R) Cilk(tm) Plus and TBB. */
+/* The API to enable interoperability between Intel(R) Cilk(TM) Plus and 
+   Intel(R) Threading Building Blocks. */
 
 #ifndef CILK_TBB_INTEROP_H
 #define CILK_TBB_INTEROP_H
diff --git a/src/tbb/concurrent_queue.cpp b/src/tbb/concurrent_queue.cpp
index 6c7750b..8094906 100644
--- a/src/tbb/concurrent_queue.cpp
+++ b/src/tbb/concurrent_queue.cpp
@@ -194,17 +194,16 @@ void micro_queue::push( const void* item, ticket k, concurrent_queue_base& base
     }
 
     // wait for my turn
-    if( tail_counter!=k ) {
-        atomic_backoff backoff;
-        do {
-            backoff.pause();
-            // no memory. throws an exception; assumes concurrent_queue_rep::n_queue>1
-            if( tail_counter&0x1 ) {
+    if( tail_counter!=k ) // The developer insisted on keeping first check out of the backoff loop
+        for( atomic_backoff b(true);;b.pause() ) {
+            ticket tail = tail_counter;
+            if( tail==k ) break;
+            else if( tail&0x1 ) {
+                // no memory. throws an exception; assumes concurrent_queue_rep::n_queue>1
                 ++base.my_rep->n_invalid_entries;
                 throw_exception( eid_bad_last_alloc );
             }
-        } while( tail_counter!=k ) ;
-    }
+        }
 
     if( p ) { // page is newly allocated; insert in micro_queue
         spin_mutex::scoped_lock lock( page_mutex );
diff --git a/src/tbb/dynamic_link.h b/src/tbb/dynamic_link.h
index 00d7618..0b58516 100644
--- a/src/tbb/dynamic_link.h
+++ b/src/tbb/dynamic_link.h
@@ -111,7 +111,7 @@ enum dynamic_link_error_t {
     dl_success = 0,
     dl_lib_not_found,     // char const * lib, dlerr_t err
     dl_sym_not_found,     // char const * sym, dlerr_t err
-                          // Note: dlerr_t depends on OS: it is char const * on Linux and Mac, int on Windows.
+                          // Note: dlerr_t depends on OS: it is char const * on Linux* and OS X*, int on Windows*.
     dl_sys_fail,          // char const * func, int err
     dl_buff_too_small     // none
 }; // dynamic_link_error_t
diff --git a/src/tbb/governor.cpp b/src/tbb/governor.cpp
index 0f2b469..bb95f73 100644
--- a/src/tbb/governor.cpp
+++ b/src/tbb/governor.cpp
@@ -46,7 +46,7 @@ namespace internal {
 //------------------------------------------------------------------------
 
 #if __TBB_SURVIVE_THREAD_SWITCH
-// Support for interoperability with Intel(R) Cilk(tm) Plus.
+// Support for interoperability with Intel(R) Cilk(TM) Plus.
 
 #if _WIN32
 #define CILKLIB_NAME "cilkrts20.dll"
diff --git a/src/tbb/index.html b/src/tbb/index.html
index d1bf5a7..315c1bc 100644
--- a/src/tbb/index.html
+++ b/src/tbb/index.html
@@ -13,7 +13,7 @@ This directory contains the source code of the TBB core components.
 <DT><A HREF="ia32-masm">ia32-masm</A>
 <DD>Assembly code for IA32 architecture.
 <DT><A HREF="ia64-gas">ia64-gas</A>
-<DD>Assembly code for IA64 architecture.
+<DD>Assembly code for IA-64 architecture.
 <DT><A HREF="ibm_aix51">ibm_aix51</A>
 <DD>Assembly code for AIX 5.1 port.
 </DL>
diff --git a/src/tbb/mac32-tbb-export.lst b/src/tbb/mac32-tbb-export.lst
index d5b2aa3..a11c15a 100644
--- a/src/tbb/mac32-tbb-export.lst
+++ b/src/tbb/mac32-tbb-export.lst
@@ -29,7 +29,7 @@
 #include "tbb/tbb_config.h"
 
 /*
-    Sometimes Mac OS X requires leading underscore (e. g. in export list file), but sometimes not
+    Sometimes OS X* requires leading underscore (e. g. in export list file), but sometimes not
     (e. g. when searching symbol in a dynamic library via dlsym()). Symbols in this file SHOULD
     be listed WITHOUT one leading underscore. __TBB_SYMBOL macro should add underscore when
     necessary, depending on the indended usage.
diff --git a/src/tbb/mac64-tbb-export.lst b/src/tbb/mac64-tbb-export.lst
index 27237a7..3cadedf 100644
--- a/src/tbb/mac64-tbb-export.lst
+++ b/src/tbb/mac64-tbb-export.lst
@@ -29,7 +29,7 @@
 #include "tbb/tbb_config.h"
 
 /*
-    Sometimes Mac OS X requires leading underscore (e. g. in export list file), but sometimes not
+    Sometimes OS X* requires leading underscore (e. g. in export list file), but sometimes not
     (e. g. when searching symbol in a dynamic library via dlsym()). Symbols in this file SHOULD
     be listed WITHOUT one leading underscore. __TBB_SYMBOL macro should add underscore when
     necessary, depending on the indended usage.
diff --git a/src/tbb/mailbox.h b/src/tbb/mailbox.h
index 4d5feef..ac517bb 100644
--- a/src/tbb/mailbox.h
+++ b/src/tbb/mailbox.h
@@ -133,7 +133,8 @@ class mail_outbox : unpadded_mail_outbox {
             } else {
                 // Some other thread updated my_last but has not filled in first->next_in_mailbox
                 // Wait until first item points to second item.
-                for( atomic_backoff backoff; !(second = first->next_in_mailbox); backoff.pause() ) {}
+                atomic_backoff backoff;
+                while( !(second = first->next_in_mailbox) ) backoff.pause();
                 my_first = second;
             }
         }
diff --git a/src/tbb/queuing_rw_mutex.cpp b/src/tbb/queuing_rw_mutex.cpp
index 78a1c60..9509ec9 100644
--- a/src/tbb/queuing_rw_mutex.cpp
+++ b/src/tbb/queuing_rw_mutex.cpp
@@ -424,14 +424,12 @@ requested:
         if( n_state & (STATE_COMBINED_READER | STATE_UPGRADE_REQUESTED) ) {
             // save n|FLAG for simplicity of following comparisons
             tmp = tricky_pointer(n)|FLAG;
-            atomic_backoff backoff;
-            while(__TBB_load_relaxed(my_next)==tmp) {
+            for( atomic_backoff b; __TBB_load_relaxed(my_next)==tmp; b.pause() ) {
                 if( my_state & STATE_COMBINED_UPGRADING ) {
                     if( __TBB_load_with_acquire(my_next)==tmp )
                         __TBB_store_relaxed(my_next, n);
                     goto waiting;
                 }
-                backoff.pause();
             }
             __TBB_ASSERT(__TBB_load_relaxed(my_next) != (tricky_pointer(n)|FLAG), NULL);
             goto requested;
diff --git a/src/tbb/scheduler.cpp b/src/tbb/scheduler.cpp
index f2ed613..db6ca58 100644
--- a/src/tbb/scheduler.cpp
+++ b/src/tbb/scheduler.cpp
@@ -226,7 +226,7 @@ void generic_scheduler::init_stack_info () {
                 if ( 0 == pthread_attr_getstacksize(&attr_stack, &stack_size) ) {
                     if ( np_stack_size < stack_size ) {
                         // We are in a secondary thread. Use reliable data.
-                        // IA64 stack is split into RSE backup and memory parts
+                        // IA-64 architecture stack is split into RSE backup and memory parts
                         rsb_base = stack_limit;
                         stack_size = np_stack_size/2;
                         // Limit of the memory part of the stack
@@ -238,7 +238,7 @@ void generic_scheduler::init_stack_info () {
                 }
                 pthread_attr_destroy(&attr_stack);
             }
-            // IA64 stack is split into RSE backup and memory parts
+            // IA-64 architecture stack is split into RSE backup and memory parts
             my_rsb_stealing_threshold = (uintptr_t)((char*)rsb_base + stack_size/2);
 #endif /* __TBB_ipf */
             // Size of the stack free part 
@@ -479,9 +479,8 @@ size_t generic_scheduler::prepare_task_pool ( size_t num_tasks ) {
 inline void generic_scheduler::acquire_task_pool() const {
     if ( !in_arena() )
         return; // we are not in arena - nothing to lock
-    atomic_backoff backoff;
     bool sync_prepare_done = false;
-    for(;;) {
+    for( atomic_backoff b;;b.pause() ) {
 #if TBB_USE_ASSERT
         __TBB_ASSERT( my_arena_slot == my_arena->my_slots + my_arena_index, "invalid arena slot index" );
         // Local copy of the arena slot task pool pointer is necessary for the next
@@ -502,7 +501,6 @@ inline void generic_scheduler::acquire_task_pool() const {
             sync_prepare_done = true;
         }
         // Someone else acquired a lock, so pause and do exponential backoff.
-        backoff.pause();
     }
     __TBB_ASSERT( my_arena_slot->task_pool == LockedTaskPool, "not really acquired task pool" );
 } // generic_scheduler::acquire_task_pool
@@ -524,9 +522,8 @@ inline void generic_scheduler::release_task_pool() const {
     Thus if any of them is changed, consider changing the counterpart as well **/
 inline task** generic_scheduler::lock_task_pool( arena_slot* victim_arena_slot ) const {
     task** victim_task_pool;
-    atomic_backoff backoff;
     bool sync_prepare_done = false;
-    for(;;) {
+    for( atomic_backoff backoff;; /*backoff pause embedded in the loop*/) {
         victim_task_pool = victim_arena_slot->task_pool;
         // NOTE: Do not use comparison of head and tail indices to check for
         // the presence of work in the victim's task pool, as they may give
@@ -1229,23 +1226,21 @@ void generic_scheduler::cleanup_master() {
 
     However this version of the algorithm requires more analysis and verification.
 
-3.  There is no portable way to get stack base address in Posix, however
-    the modern Linux versions provide pthread_attr_np API that can be used
-    to obtain thread's stack size and base address. Unfortunately even this
-    function does not provide enough information for the main thread on IA64
-    (RSE spill area and memory stack are allocated as two separate discontinuous
-    chunks of memory), and there is no portable way to discern the main and
-    the secondary threads.
-    Thus for MacOS and IA64 Linux we use the TBB worker stack size for all
-    threads and use the current stack top as the stack base. This simplified
+3.  There is no portable way to get stack base address in Posix, however the modern
+    Linux versions provide pthread_attr_np API that can be used  to obtain thread's
+    stack size and base address. Unfortunately even this function does not provide
+    enough information for the main thread on IA-64 architecture (RSE spill area
+    and memory stack are allocated as two separate discontinuous chunks of memory),
+    and there is no portable way to discern the main and the secondary threads.
+    Thus for OS X* and IA-64 Linux architecture we use the TBB worker stack size for 
+    all threads and use the current stack top as the stack base. This simplified 
     approach is based on the following assumptions:
-    1) If the default stack size is insufficient for the user app needs,
-       the required amount will be explicitly specified by the user at
-       the point of the TBB scheduler initialization (as an argument to
-       tbb::task_scheduler_init constructor).
-    2) When a master thread initializes the scheduler, it has enough space
-       on its stack. Here "enough" means "at least as much as worker threads
-       have".
-    3) If the user app strives to conserve the memory by cutting stack size,
-       it should do this for TBB workers too (as in the #1).
+    1) If the default stack size is insufficient for the user app needs, the
+    required amount will be explicitly specified by the user at the point of the
+    TBB scheduler initialization (as an argument to tbb::task_scheduler_init
+    constructor).
+    2) When a master thread initializes the scheduler, it has enough space on its
+    stack. Here "enough" means "at least as much as worker threads have".
+    3) If the user app strives to conserve the memory by cutting stack size, it
+    should do this for TBB workers too (as in the #1).
 */
diff --git a/src/tbb/spin_rw_mutex.cpp b/src/tbb/spin_rw_mutex.cpp
index 586b55b..5e81ea6 100644
--- a/src/tbb/spin_rw_mutex.cpp
+++ b/src/tbb/spin_rw_mutex.cpp
@@ -74,7 +74,7 @@ void spin_rw_mutex_v3::internal_release_writer()
 void spin_rw_mutex_v3::internal_acquire_reader()
 {
     ITT_NOTIFY(sync_prepare, this);
-    for( internal::atomic_backoff backoff;;backoff.pause() ){
+    for( internal::atomic_backoff b;;b.pause() ){
         state_t s = const_cast<volatile state_t&>(state); // ensure reloading
         if( !(s & (WRITER|WRITER_PENDING)) ) { // no writer or write requests
             state_t t = (state_t)__TBB_FetchAndAddW( &state, (intptr_t) ONE_READER );
@@ -102,8 +102,8 @@ bool spin_rw_mutex_v3::internal_upgrade()
         state_t old_s = s;
         if( (s=CAS(state, s | WRITER | WRITER_PENDING, s))==old_s ) {
             ITT_NOTIFY(sync_prepare, this);
-            for( internal::atomic_backoff backoff; (state & READERS) != ONE_READER; )
-                backoff.pause(); // while more than 1 reader
+            internal::atomic_backoff backoff;
+            while( (state & READERS) != ONE_READER ) backoff.pause();
             __TBB_ASSERT((state&(WRITER_PENDING|WRITER))==(WRITER_PENDING|WRITER),"invalid state when upgrading to writer");
             // both new readers and writers are blocked at this time
             __TBB_FetchAndAddW( &state,  - (intptr_t)(ONE_READER+WRITER_PENDING));
diff --git a/src/tbb/tbb_assert_impl.h b/src/tbb/tbb_assert_impl.h
index 2dca330..b276c91 100644
--- a/src/tbb/tbb_assert_impl.h
+++ b/src/tbb/tbb_assert_impl.h
@@ -38,7 +38,6 @@
 #include <stdarg.h>
 #if _MSC_VER
 #include <crtdbg.h>
-#define __TBB_USE_DBGBREAK_DLG TBB_USE_DEBUG
 #endif
 
 #if _MSC_VER >= 1400
@@ -76,7 +75,7 @@ namespace tbb {
                          expression, line, filename );
                 if( comment )
                     fprintf( stderr, "Detailed description: %s\n", comment );
-#if __TBB_USE_DBGBREAK_DLG
+#if _MSC_VER && _DEBUG
                 if(1 == _CrtDbgReport(_CRT_ASSERT, filename, line, "tbb_debug.dll", "%s\r\n%s", expression, comment?comment:""))
                         _CrtDbgBreak();
 #else
diff --git a/src/tbb/tbb_misc.cpp b/src/tbb/tbb_misc.cpp
index 54ac455..8f3ffb6 100644
--- a/src/tbb/tbb_misc.cpp
+++ b/src/tbb/tbb_misc.cpp
@@ -228,7 +228,7 @@ done:;
 
 //! Handle 8-byte store that crosses a cache line.
 extern "C" void __TBB_machine_store8_slow( volatile void *ptr, int64_t value ) {
-    for( tbb::internal::atomic_backoff b;; b.pause() ) {
+    for( tbb::internal::atomic_backoff b;;b.pause() ) {
         int64_t tmp = *(int64_t*)ptr;
         if( __TBB_machine_cmpswp8(ptr,value,tmp)==tmp ) 
             break;
@@ -239,16 +239,12 @@ extern "C" void __TBB_machine_store8_slow( volatile void *ptr, int64_t value ) {
 #endif /* !__TBB_RML_STATIC */
 
 #if __TBB_ipf
-/* It was found that on IPF inlining of __TBB_machine_lockbyte leads
-   to serious performance regression with ICC 10.0. So keep it out-of-line.
+/* It was found that on IA-64 architecture inlining of __TBB_machine_lockbyte leads
+   to serious performance regression with ICC. So keep it out-of-line.
  */
 extern "C" intptr_t __TBB_machine_lockbyte( volatile unsigned char& flag ) {
-    if ( !__TBB_TryLockByte(flag) ) {
-        tbb::internal::atomic_backoff b;
-        do {
-            b.pause();
-        } while ( !__TBB_TryLockByte(flag) );
-    }
+    tbb::internal::atomic_backoff backoff;
+    while( !__TBB_TryLockByte(flag) ) backoff.pause();
     return 0;
 }
 #endif
diff --git a/src/tbb/tbb_resource.rc b/src/tbb/tbb_resource.rc
index e143e75..1a90397 100644
--- a/src/tbb/tbb_resource.rc
+++ b/src/tbb/tbb_resource.rc
@@ -87,7 +87,7 @@ BEGIN
         BLOCK "000004b0"
         BEGIN
             VALUE "CompanyName", "Intel Corporation\0"
-            VALUE "FileDescription", "Threading Building Blocks library\0"
+            VALUE "FileDescription", "Intel(R) Threading Building Blocks library\0"
             VALUE "FileVersion", TBB_VERSION "\0"
 //what is it?            VALUE "InternalName", "tbb\0"
             VALUE "LegalCopyright", "Copyright 2005-2013 Intel Corporation.  All Rights Reserved.\0"
diff --git a/src/tbb/tools_api/ittnotify.h b/src/tbb/tools_api/ittnotify.h
index 9142709..0a4a690 100644
--- a/src/tbb/tools_api/ittnotify.h
+++ b/src/tbb/tools_api/ittnotify.h
@@ -37,7 +37,8 @@
 The ITT API is used to annotate a user's program with additional information
 that can be used by correctness and performance tools. The user inserts
 calls in their program. Those calls generate information that is collected
-at runtime, and used by tools such as Amplifier and Inspector.
+at runtime, and used by tools such as Intel(R) Parallel Amplifier and
+Intel(R) Parallel Inspector.
 
 @section API Concepts
 The following general concepts are used throughout the API.
diff --git a/src/tbb/tools_api/ittnotify_config.h b/src/tbb/tools_api/ittnotify_config.h
index 02077a8..c5d082f 100644
--- a/src/tbb/tools_api/ittnotify_config.h
+++ b/src/tbb/tools_api/ittnotify_config.h
@@ -256,7 +256,7 @@ INLINE int __itt_interlocked_increment(volatile long* ptr)
 #ifdef __INTEL_COMPILER
 #define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val)
 #else  /* __INTEL_COMPILER */
-/* TODO: Add Support for not Intel compilers for IA64 */
+/* TODO: Add Support for not Intel compilers for IA-64 architecture*/
 #endif /* __INTEL_COMPILER */
 #else /* ITT_ARCH!=ITT_ARCH_IA64 */
 INLINE int __TBB_machine_fetchadd4(volatile void* ptr, long addend)
diff --git a/src/tbbmalloc/MapMemory.h b/src/tbbmalloc/MapMemory.h
index 76a4646..0fd49e7 100644
--- a/src/tbbmalloc/MapMemory.h
+++ b/src/tbbmalloc/MapMemory.h
@@ -71,7 +71,7 @@ void* MapMemory (size_t bytes, bool hugePages)
     void* result = 0;
     int prevErrno = errno;
 #ifndef MAP_ANONYMOUS
-// Mac OS* X defines MAP_ANON, which is deprecated in Linux.
+// OS X* defines MAP_ANON, which is deprecated in Linux*.
 #define MAP_ANONYMOUS MAP_ANON
 #endif /* MAP_ANONYMOUS */
     int addFlags = hugePages? __TBB_MAP_HUGETLB : 0;
diff --git a/src/tbbmalloc/backend.cpp b/src/tbbmalloc/backend.cpp
index f87e3b9..5224959 100644
--- a/src/tbbmalloc/backend.cpp
+++ b/src/tbbmalloc/backend.cpp
@@ -792,19 +792,6 @@ void AllLargeBlocksList::remove(LargeMemoryBlock *lmb)
         lmb->gPrev->gNext = lmb->gNext;
 }
 
-void AllLargeBlocksList::removeAll(Backend *backend)
-{
-    LargeMemoryBlock *next, *lmb = loHead;
-    loHead = NULL;
-
-    for (; lmb; lmb = next) {
-        next = lmb->gNext;
-        // nothing left to AllLargeBlocksList::remove
-        lmb->gNext = lmb->gPrev = NULL;
-        backend->returnLargeObject(lmb);
-    }
-}
-
 void Backend::putLargeBlock(LargeMemoryBlock *lmb)
 {
     if (extMemPool->mustBeAddedToGlobalLargeBlockList())
diff --git a/src/tbbmalloc/frontend.cpp b/src/tbbmalloc/frontend.cpp
index ee6063b..bbd5fe2 100644
--- a/src/tbbmalloc/frontend.cpp
+++ b/src/tbbmalloc/frontend.cpp
@@ -308,7 +308,7 @@ MallocMutex  MemoryPool::memPoolListLock;
 // TODO: move huge page status to default pool, because that's its states
 HugePagesStatus hugePages;
 
-// Slab block is 16KB-aligned. To prvent false sharing, separate locally-accessed
+// Slab block is 16KB-aligned. To prevent false sharing, separate locally-accessed
 // fields and fields commonly accessed by not owner threads.
 class GlobalBlockFields : public BlockI {
 protected:
@@ -831,6 +831,25 @@ done:
 
 /********* Thread and block related code      *************/
 
+ template<bool poolDestroy> void AllLargeBlocksList::releaseAll(Backend *backend) {
+     LargeMemoryBlock *next, *lmb = loHead;
+     loHead = NULL;
+
+     for (; lmb; lmb = next) {
+         next = lmb->gNext;
+         if (poolDestroy) {
+             // as it's pool destruction, no need to return object to backend,
+             // only remove backrefs, as they are global
+             removeBackRef(lmb->backRefIdx);
+         } else {
+             // clean g(Next|Prev) to prevent removing lmb
+             // from AllLargeBlocksList inside returnLargeObject
+             lmb->gNext = lmb->gPrev = NULL;
+             backend->returnLargeObject(lmb);
+         }
+     }
+}
+
 TLSData* MemoryPool::getTLS(bool create)
 {
     TLSData* tls = extMemPool.tlsPointerKey.getThreadMallocTLS();
@@ -955,12 +974,14 @@ bool MemoryPool::init(intptr_t poolId, const MemPoolPolicy *policy)
 
 void MemoryPool::reset()
 {
+    MALLOC_ASSERT(extMemPool.userPool(), "No reset for the system pool.");
     // memory is not releasing during pool reset
     // TODO: mark regions to release unused on next reset()
     extMemPool.delayRegionsReleasing(true);
 
     bootStrapBlocks.reset();
     orphanedBlocks.reset();
+    extMemPool.lmbList.releaseAll</*poolDestroy=*/false>(&extMemPool.backend);
     extMemPool.reset();
 
     extMemPool.initTLS();
@@ -979,12 +1000,8 @@ void MemoryPool::destroy()
     }
     // slab blocks in non-default pool do not have backreferencies,
     // only large objects do
-    for (LargeMemoryBlock *lmb = extMemPool.lmbList.getHead(); lmb; ) {
-        LargeMemoryBlock *next = lmb->gNext;
-        if (extMemPool.userPool())
-            removeBackRef(lmb->backRefIdx);
-        lmb = next;
-    }
+    if (extMemPool.userPool())
+        extMemPool.lmbList.releaseAll</*poolDestroy=*/true>(&extMemPool.backend);
     extMemPool.destroy();
 }
 
diff --git a/src/tbbmalloc/tbbmalloc.cpp b/src/tbbmalloc/tbbmalloc.cpp
index 5870167..a041474 100644
--- a/src/tbbmalloc/tbbmalloc.cpp
+++ b/src/tbbmalloc/tbbmalloc.cpp
@@ -221,18 +221,14 @@ void __TBB_internal_free(void *object)
 } } // namespaces
 
 #if __TBB_ipf
-/* It was found that on IPF inlining of __TBB_machine_lockbyte leads
-   to serious performance regression with ICC 10.0. So keep it out-of-line.
+/* It was found that on IA-64 architecture inlining of __TBB_machine_lockbyte leads
+   to serious performance regression with ICC. So keep it out-of-line.
 
    This code is copy-pasted from tbb_misc.cpp.
  */
 extern "C" intptr_t __TBB_machine_lockbyte( volatile unsigned char& flag ) {
-    if ( !__TBB_TryLockByte(flag) ) {
-        tbb::internal::atomic_backoff b;
-        do {
-            b.pause();
-        } while ( !__TBB_TryLockByte(flag) );
-    }
+    tbb::internal::atomic_backoff backoff;
+    while( !__TBB_TryLockByte(flag) ) backoff.pause();
     return 0;
 }
 #endif
diff --git a/src/tbbmalloc/tbbmalloc_internal.h b/src/tbbmalloc/tbbmalloc_internal.h
index bf529cd..64853da 100644
--- a/src/tbbmalloc/tbbmalloc_internal.h
+++ b/src/tbbmalloc/tbbmalloc_internal.h
@@ -363,7 +363,7 @@ private:
     HugeCacheType hugeCache;
 
     /* logical time, incremented on each put/get operation
-       To prevent starvation between pools, keep separatly for each pool.
+       To prevent starvation between pools, keep separately for each pool.
        Overflow is OK, as we only want difference between
        its current value and some recent.
 
@@ -433,8 +433,8 @@ struct LargeMemoryBlock : public BlockI {
     LargeMemoryBlock *next,          // ptrs in list of cached blocks
                      *prev,
     // 2-linked list of pool's large objects
-    // Used to destroy backrefs on pool destroy/reset (backrefs are global)
-    // and for releasing all non-binned blocks.
+    // Used to destroy backrefs on pool destroy (backrefs are global)
+    // and for object releasing during pool reset.
                      *gPrev,
                      *gNext;
     uintptr_t         age;           // age of block while in cache
@@ -698,7 +698,70 @@ public:
     LargeMemoryBlock *getHead() { return loHead; }
     void add(LargeMemoryBlock *lmb);
     void remove(LargeMemoryBlock *lmb);
-    void removeAll(Backend *backend);
+    template<bool poolDestroy> void releaseAll(Backend *backend);
+};
+
+struct ExtMemoryPool {
+    Backend           backend;
+
+    intptr_t          poolId;
+    // to find all large objects
+    AllLargeBlocksList lmbList;
+    // Callbacks to be used instead of MapMemory/UnmapMemory.
+    rawAllocType      rawAlloc;
+    rawFreeType       rawFree;
+    size_t            granularity;
+    bool              keepAllMemory,
+                      delayRegsReleasing,
+                      fixedPool;
+    TLSKey            tlsPointerKey;  // per-pool TLS key
+
+    LargeObjectCache  loc;
+
+    bool init(intptr_t poolId, rawAllocType rawAlloc, rawFreeType rawFree,
+              size_t granularity, bool keepAllMemory, bool fixedPool);
+    void initTLS();
+
+    // i.e., not system default pool for scalable_malloc/scalable_free
+    bool userPool() const { return rawAlloc; }
+
+     // true if something has beed released
+    bool softCachesCleanup();
+    bool releaseTLCaches();
+    // TODO: to release all thread's pools, not just current thread
+    bool hardCachesCleanup();
+    void reset() {
+        loc.reset();
+        tlsPointerKey.~TLSKey();
+        backend.reset();
+    }
+    void destroy() {
+        // pthread_key_dtors must be disabled before memory unmapping
+        // TODO: race-free solution
+        tlsPointerKey.~TLSKey();
+        if (rawFree || !userPool())
+            backend.destroy();
+    }
+    bool mustBeAddedToGlobalLargeBlockList() const { return userPool(); }
+    void delayRegionsReleasing(bool mode) { delayRegsReleasing = mode; }
+    inline bool regionsAreReleaseable() const;
+
+    LargeMemoryBlock *mallocLargeObject(size_t allocationSize);
+    void freeLargeObject(LargeMemoryBlock *lmb);
+    void freeLargeObjectList(LargeMemoryBlock *head);
+};
+
+inline bool Backend::inUserPool() const { return extMemPool->userPool(); }
+
+struct LargeObjectHdr {
+    LargeMemoryBlock *memoryBlock;
+    /* Backreference points to LargeObjectHdr.
+       Duplicated in LargeMemoryBlock to reuse in subsequent allocations. */
+    BackRefIdx       backRefIdx;
+};
+
+struct FreeObject {
+    FreeObject  *next;
 };
 
 // An TBB allocator mode that can be controlled by user
@@ -770,70 +833,6 @@ public:
 
 extern HugePagesStatus hugePages;
 
-struct ExtMemoryPool {
-    Backend           backend;
-
-    intptr_t          poolId;
-    // to find all large objects
-    AllLargeBlocksList lmbList;
-    // Callbacks to be used instead of MapMemory/UnmapMemory.
-    rawAllocType      rawAlloc;
-    rawFreeType       rawFree;
-    size_t            granularity;
-    bool              keepAllMemory,
-                      delayRegsReleasing,
-                      fixedPool;
-    TLSKey            tlsPointerKey;  // per-pool TLS key
-
-    LargeObjectCache  loc;
-
-    bool init(intptr_t poolId, rawAllocType rawAlloc, rawFreeType rawFree,
-              size_t granularity, bool keepAllMemory, bool fixedPool);
-    void initTLS();
-
-    // i.e., not system default pool for scalable_malloc/scalable_free
-    bool userPool() const { return rawAlloc; }
-
-     // true if something has beed released
-    bool softCachesCleanup();
-    bool releaseTLCaches();
-    // TODO: to release all thread's pools, not just current thread
-    bool hardCachesCleanup();
-    void reset() {
-        lmbList.removeAll(&backend);
-        loc.reset();
-        tlsPointerKey.~TLSKey();
-        backend.reset();
-    }
-    void destroy() {
-        // pthread_key_dtors must be disabled before memory unmapping
-        // TODO: race-free solution
-        tlsPointerKey.~TLSKey();
-        if (rawFree || !userPool())
-            backend.destroy();
-    }
-    bool mustBeAddedToGlobalLargeBlockList() const { return userPool(); }
-    void delayRegionsReleasing(bool mode) { delayRegsReleasing = mode; }
-    inline bool regionsAreReleaseable() const;
-
-    LargeMemoryBlock *mallocLargeObject(size_t allocationSize);
-    void freeLargeObject(LargeMemoryBlock *lmb);
-    void freeLargeObjectList(LargeMemoryBlock *head);
-};
-
-inline bool Backend::inUserPool() const { return extMemPool->userPool(); }
-
-struct LargeObjectHdr {
-    LargeMemoryBlock *memoryBlock;
-    /* Backreference points to LargeObjectHdr.
-       Duplicated in LargeMemoryBlock to reuse in subsequent allocations. */
-    BackRefIdx       backRefIdx;
-};
-
-struct FreeObject {
-    FreeObject  *next;
-};
-
 /******* A helper class to support overriding malloc with scalable_malloc *******/
 #if MALLOC_CHECK_RECURSION
 
diff --git a/src/test/harness.h b/src/test/harness.h
index 95b6bc6..5c93e87 100644
--- a/src/test/harness.h
+++ b/src/test/harness.h
@@ -65,6 +65,7 @@ int TestMain ();
 #if __SUNPRO_CC
     #include <stdlib.h>
     #include <string.h>
+    #include <ucontext.h>
 #else /* !__SUNPRO_CC */
     #include <cstdlib>
 #if !TBB_USE_EXCEPTIONS && _MSC_VER
@@ -89,51 +90,120 @@ int TestMain ();
 
 #if _WIN32||_WIN64
     #include "tbb/machine/windows_api.h"
-#if _XBOX
-    #undef HARNESS_NO_PARSE_COMMAND_LINE
-    #define HARNESS_NO_PARSE_COMMAND_LINE 1
-#endif
-#if __TBB_WIN8UI_SUPPORT
-#include <thread>
-#endif
+    #if _WIN32_WINNT > 0x0501 && _MSC_VER
+        #include <dbghelp.h>
+        #pragma comment (lib, "dbghelp.lib")
+    #endif
+    #if _XBOX
+        #undef HARNESS_NO_PARSE_COMMAND_LINE
+        #define HARNESS_NO_PARSE_COMMAND_LINE 1
+    #endif
+    #if __TBB_WIN8UI_SUPPORT
+        #include <thread>
+    #endif
+    #if _MSC_VER
+        #include <crtdbg.h>
+    #endif
     #include <process.h>
 #else
     #include <pthread.h>
 #endif
+
 #if __linux__
     #include <sys/utsname.h> /* for uname */
     #include <errno.h>       /* for use in LinuxKernelVersion() */
+    #include <features.h>
+#endif
+// at least GLIBC 2.1 or OSX 10.5
+#if __GLIBC__>2 || ( __GLIBC__==2 && __GLIBC_MINOR__ >= 1) || __APPLE__
+    #include <execinfo.h> /*backtrace*/
+    #define BACKTRACE_FUNCTION_AVAILABLE 1
 #endif
 
 #include "harness_report.h"
 
+#if HARNESS_USE_RUNTIME_LOADER
+    #define TBB_PREVIEW_RUNTIME_LOADER 1
+    #include "tbb/runtime_loader.h"
+    static char const * _path[] = { ".", NULL };
+    static tbb::runtime_loader _runtime_loader( _path );
+#endif // HARNESS_USE_RUNTIME_LOADER
+
 #if !HARNESS_NO_ASSERT
 
 #include "harness_assert.h"
+#if TEST_USES_TBB
+#include <tbb/tbb_stddef.h> /*set_assertion_handler*/
+
+struct InitReporter {
+    InitReporter() {
+#if TBB_USE_ASSERT
+        tbb::set_assertion_handler(ReportError);
+#endif
+        ASSERT_WARNING(TBB_INTERFACE_VERSION <= tbb::TBB_runtime_interface_version(), "runtime version mismatch");
+    }
+};
+static InitReporter InitReportError;
+#endif
 
 typedef void (*test_error_extra_t)(void);
 static test_error_extra_t ErrorExtraCall;
 //! Set additional handler to process failed assertions
 void SetHarnessErrorProcessing( test_error_extra_t extra_call ) {
     ErrorExtraCall = extra_call;
-    // TODO: add tbb::set_assertion_handler(ReportError);
 }
 
 //! Reports errors issued by failed assertions
 void ReportError( const char* filename, int line, const char* expression, const char * message ) {
+#if BACKTRACE_FUNCTION_AVAILABLE
+    const int sz = 100; // max number of frames to capture
+    void *buff[sz];
+    int n = backtrace(buff, sz);
+    REPORT("Call stack info (%d):\n", n);
+    backtrace_symbols_fd(buff, n, fileno(stdout));
+#elif __SUNPRO_CC
+    REPORT("Call stack info:\n");
+    printstack(fileno(stdout));
+#elif _WIN32_WINNT > 0x0501 && _MSC_VER && !__TBB_WIN8UI_SUPPORT
+    const int sz = 62; // XP limitation for number of frames
+    void *buff[sz];
+    int n = CaptureStackBackTrace(0, sz, buff, NULL);
+    REPORT("Call stack info (%d):\n", n);
+    static LONG once = 0;
+    if( !InterlockedExchange(&once, 1) )
+        SymInitialize(GetCurrentProcess(), NULL, TRUE);
+    const int len = 255; // just some reasonable string buffer size
+    union { SYMBOL_INFO sym; char pad[sizeof(SYMBOL_INFO)+len]; };
+    sym.MaxNameLen = len;
+    sym.SizeOfStruct = sizeof( SYMBOL_INFO );
+    DWORD64 offset;
+    for(int i = 1; i < n; i++) { // skip current frame
+        if(!SymFromAddr( GetCurrentProcess(), DWORD64(buff[i]), &offset, &sym )) {
+            sym.Address = ULONG64(buff[i]); offset = 0; sym.Name[0] = 0;
+        }
+        REPORT("[%d] %016I64LX+%04I64LX: %s\n", i, sym.Address, offset, sym.Name); //TODO: print module name
+    }
+#endif /*BACKTRACE_FUNCTION_AVAILABLE*/
+
 #if __TBB_ICL_11_1_CODE_GEN_BROKEN
     printf("%s:%d, assertion %s: %s\n", filename, line, expression, message ? message : "failed" );
 #else
     REPORT_FATAL_ERROR("%s:%d, assertion %s: %s\n", filename, line, expression, message ? message : "failed" );
 #endif
+
     if( ErrorExtraCall )
         (*ErrorExtraCall)();
+    fflush(stdout); fflush(stderr);
 #if HARNESS_TERMINATE_ON_ASSERT
     TerminateProcess(GetCurrentProcess(), 1);
 #elif HARNESS_EXIT_ON_ASSERT
     exit(1);
 #elif HARNESS_CONTINUE_ON_ASSERT
     // continue testing
+#elif _MSC_VER && _DEBUG
+    // aligned with tbb_assert_impl.h behavior
+    if(1 == _CrtDbgReport(_CRT_ASSERT, filename, line, NULL, "%s\r\n%s", expression, message?message:""))
+        _CrtDbgBreak();
 #else
     abort();
 #endif /* HARNESS_EXIT_ON_ASSERT */
@@ -239,13 +309,6 @@ static void ParseCommandLine( int argc, char* argv[] ) {
 }
 #endif /* HARNESS_NO_PARSE_COMMAND_LINE */
 
-#if HARNESS_USE_PROXY
-    #define TBB_PREVIEW_RUNTIME_LOADER 1
-    #include "tbb/runtime_loader.h"
-    static char const * _path[] = { ".", NULL };
-    static tbb::runtime_loader _runtime_loader( _path );
-#endif // HARNESS_USE_PROXY
-
 #if !HARNESS_CUSTOM_MAIN
 
 #if __TBB_MPI_INTEROP
@@ -259,13 +322,13 @@ HARNESS_EXPORT
 #if HARNESS_NO_PARSE_COMMAND_LINE
 int main() {
 #if __TBB_MPI_INTEROP
-    MPI_Init(NULL,NULL); 
+    MPI_Init(NULL,NULL);
 #endif
 #else
 int main(int argc, char* argv[]) {
     ParseCommandLine( argc, argv );
 #if __TBB_MPI_INTEROP
-    MPI_Init(&argc,&argv); 
+    MPI_Init(&argc,&argv);
 #endif
 #endif
 #if __TBB_MPI_INTEROP
@@ -274,8 +337,8 @@ int main(int argc, char* argv[]) {
     // Master process receives this info and print it in verbose mode
     int rank, size, myrank;
     MPI_Status status;
-    MPI_Comm_size(MPI_COMM_WORLD,&size); 
-    MPI_Comm_rank(MPI_COMM_WORLD,&myrank); 
+    MPI_Comm_size(MPI_COMM_WORLD,&size);
+    MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
     if (myrank == 0) {
 #if !HARNESS_NO_PARSE_COMMAND_LINE
         REMARK("Hello mpi world. I am %d of %d\n", myrank, size);
@@ -547,9 +610,9 @@ public:
 #endif /* !HARNESS_NO_ASSERT */
 
 #if _WIN32 || _WIN64
-    void Sleep ( int ms ) { 
+    void Sleep ( int ms ) {
 #if !__TBB_WIN8UI_SUPPORT
-        ::Sleep(ms); 
+        ::Sleep(ms);
 #else
          std::chrono::milliseconds sleep_time( ms );
          std::this_thread::sleep_for( sleep_time );
diff --git a/src/test/harness_allocator.h b/src/test/harness_allocator.h
index f36c139..6ba30c8 100644
--- a/src/test/harness_allocator.h
+++ b/src/test/harness_allocator.h
@@ -27,7 +27,7 @@
 */
 
 // Declarations for simple estimate of the memory being used by a program.
-// Not yet implemented for Mac.
+// Not yet implemented for OS X*.
 // This header is an optional part of the test harness.
 // It assumes that "harness_assert.h" has already been included.
 
diff --git a/src/test/harness_concurrency.h b/src/test/harness_concurrency.h
new file mode 100644
index 0000000..63dc5d2
--- /dev/null
+++ b/src/test/harness_concurrency.h
@@ -0,0 +1,113 @@
+/*
+    Copyright 2005-2013 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef tbb_tests_harness_concurrency_H
+#define tbb_tests_harness_concurrency_H
+
+#if _WIN32||_WIN64
+#include "tbb/machine/windows_api.h"
+#elif __linux__
+#include <unistd.h>
+#include <sys/sysinfo.h>
+#include <string.h>
+#include <sched.h>
+#elif __FreeBSD__
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/param.h>  // Required by <sys/cpuset.h>
+#include <sys/cpuset.h>
+#endif
+
+#include <limits.h>
+
+namespace Harness {
+    static int maxProcs = 0;
+    static int GetMaxProcs() {
+        if ( !maxProcs ) {
+#if _WIN32||_WIN64
+            SYSTEM_INFO si;
+            GetNativeSystemInfo(&si);
+            maxProcs = si.dwNumberOfProcessors;
+#elif __linux__
+            maxProcs = get_nprocs();
+#else /* __FreeBSD__ */
+            maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
+#endif
+        }
+        return maxProcs;
+    }
+
+    int LimitNumberOfThreads(int max_threads) {
+        ASSERT( max_threads >= 1 , "The limited number of threads should be positive." );
+        maxProcs = GetMaxProcs();
+        if ( maxProcs < max_threads )
+            // Suppose that process mask is not set so the number of available threads equals maxProcs
+            return maxProcs;
+
+#if _WIN32||_WIN64
+        ASSERT( max_threads <= 64 , "LimitNumberOfThreads doesn't support max_threads to be more than 64 on Windows." ); 
+        DWORD_PTR mask = 1;
+        for ( int i = 1; i < max_threads; ++i )
+            mask |= mask << 1;
+        bool err = !SetProcessAffinityMask( GetCurrentProcess(), mask );
+#else /* !WIN */
+#if __linux__
+        typedef cpu_set_t mask_t;
+#if __TBB_MAIN_THREAD_AFFINITY_BROKEN
+#define setaffinity(mask) sched_setaffinity(0 /*get the mask of the calling thread*/, sizeof(mask_t), &mask)
+#else
+#define setaffinity(mask) sched_setaffinity(getpid(), sizeof(mask_t), &mask)
+#endif
+#else /* __FreeBSD__ */
+        typedef cpuset_t mask_t;
+#if __TBB_MAIN_THREAD_AFFINITY_BROKEN
+#define setaffinity(mask) cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask_t), &mask)
+#else
+#define setaffinity(mask) cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(mask_t), &mask)
+#endif
+#endif /* __FreeBSD__ */
+        mask_t newMask;
+        CPU_ZERO(&newMask);
+
+        int maskSize = (int)sizeof(mask_t) * CHAR_BIT;
+        ASSERT_WARNING( maskSize >= maxProcs, "The mask size doesn't seem to be big enough to call setaffinity. The call may return an error." );
+
+        ASSERT( max_threads <= (int)sizeof(mask_t) * CHAR_BIT , "The mask size is not enough to set the requested number of threads." );
+        for ( int i = 0; i < max_threads; ++i )
+            CPU_SET( i, &newMask );
+        int err = setaffinity( newMask );
+#endif /* !WIN */
+        ASSERT( !err, "Setting process affinity failed" );
+
+        return max_threads;
+    }
+
+} // namespace Harness
+
+#endif /* tbb_tests_harness_concurrency_H */
diff --git a/src/test/harness_defs.h b/src/test/harness_defs.h
index b9b2965..045a3f4 100644
--- a/src/test/harness_defs.h
+++ b/src/test/harness_defs.h
@@ -55,6 +55,9 @@
   #define TBB_USE_ICC_BUILTINS         ( __TBB_TEST_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT )
 #endif
 
+//ICC has a bug in assumptions of the modifications made via atomic pointer
+#define __TBB_ICC_BUILTIN_ATOMICS_POINTER_ALIASING_BROKEN (TBB_USE_ICC_BUILTINS &&  __INTEL_COMPILER < 1400 && __INTEL_COMPILER > 1200)
+
 #if (_WIN32 && !__TBB_WIN8UI_SUPPORT) || (__linux__ && !__ANDROID__) || __FreeBSD_version >= 701000
 #define __TBB_TEST_SKIP_AFFINITY 0
 #else
@@ -88,12 +91,13 @@
   #define __TBB_PVALLOC_PRESENT 1
 #endif
 
+//Implementation of C++11 std::placeholders in libstdc++ coming with gcc prior to 4.5 reveals bug in Intel Compiler 13 causing "multiple definition" link errors.
+#define __TBB_CPP11_STD_PLACEHOLDERS_LINKAGE_BROKEN ((__INTEL_COMPILER == 1300 || __INTEL_COMPILER == 1310 )&& __GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION < 40500)
+
 #if __GNUC__ && __ANDROID__
   #define __TBB_EXCEPTION_TYPE_INFO_BROKEN ( __TBB_GCC_VERSION < 40600 )
 #elif _MSC_VER
   #define __TBB_EXCEPTION_TYPE_INFO_BROKEN ( _MSC_VER < 1400 )
-#elif  __clang__ //TODO: recheck on different clang versions
-  #define __TBB_EXCEPTION_TYPE_INFO_BROKEN 1
 #else
   #define __TBB_EXCEPTION_TYPE_INFO_BROKEN 0
 #endif
diff --git a/src/test/harness_graph.h b/src/test/harness_graph.h
index 5c71626..b16780c 100644
--- a/src/test/harness_graph.h
+++ b/src/test/harness_graph.h
@@ -135,7 +135,7 @@ struct harness_graph_executor {
     static size_t max_executors;
 
     static inline OutputType func( InputType v ) {
-        size_t c; // Declaration separate from initialization to avoid ICC internal error on IA-64
+        size_t c; // Declaration separate from initialization to avoid ICC internal error on IA-64 architecture
         c = current_executors.fetch_and_increment();
         ASSERT( max_executors == 0 || c <= max_executors, NULL ); 
         ++execute_count;
@@ -185,7 +185,7 @@ struct harness_graph_multifunction_executor {
 
 
     static inline void func( const InputType &v, ports_type &p ) {
-        size_t c; // Declaration separate from initialization to avoid ICC internal error on IA-64
+        size_t c; // Declaration separate from initialization to avoid ICC internal error on IA-64 architecture
         c = current_executors.fetch_and_increment();
         ASSERT( max_executors == 0 || c <= max_executors, NULL ); 
         ASSERT(tbb::flow::tuple_size<OutputTuple>::value == 1, NULL);
diff --git a/src/test/harness_inject_scheduler.h b/src/test/harness_inject_scheduler.h
index 747664e..16a6ebf 100644
--- a/src/test/harness_inject_scheduler.h
+++ b/src/test/harness_inject_scheduler.h
@@ -67,4 +67,16 @@
 #endif
 #include "../rml/client/rml_tbb.cpp"
 
+#if HARNESS_USE_RUNTIME_LOADER
+#undef HARNESS_USE_RUNTIME_LOADER
+#include "harness.h"
+
+int TestMain () {
+    // Tests that directly include sources make no sense in runtime loader testing mode.
+    return Harness::Skipped;
+}
+// Renaming the TestMain function avoids conditional compilation around same function in the test file
+#define TestMain TestMainSkipped
+#endif
+
 #endif /* harness_inject_scheduler_H */
diff --git a/src/test/harness_memory.h b/src/test/harness_memory.h
index f51d13d..db116df 100644
--- a/src/test/harness_memory.h
+++ b/src/test/harness_memory.h
@@ -27,7 +27,7 @@
 */
 
 // Declarations for simple estimate of the memory being used by a program.
-// Not yet implemented for Mac.
+// Not yet implemented for OS X*.
 // This header is an optional part of the test harness.
 // It assumes that "harness_assert.h" has already been included.
 
diff --git a/src/test/test_atomic.cpp b/src/test/test_atomic.cpp
index 42205b1..59ab109 100644
--- a/src/test/test_atomic.cpp
+++ b/src/test/test_atomic.cpp
@@ -632,9 +632,6 @@ void TestAtomicInteger( const char* name ) {
 namespace test_indirection_helpers {
     template<typename T>
     struct Foo {
-        //this constructor is needed to workaround ICC intrinsics port (compiler ?)bug, firing assertion below
-        //TODO: move this under #if
-        Foo(): x(), y(), z() {}
         T x, y, z;
     };
 }
@@ -656,22 +653,13 @@ void TestIndirection() {
         (*pointer).z = value2;
         T result1 = (*pointer).y;
         T result2 = pointer->z;
-        //TODO: investigate (fill a bug?)assertion failure below for ICC (12.1.2?) intrinsic port for sizes of 4,6,7
-        //and remove default constructor for test_indirection_helpers::Foo
-        #if !TBB_USE_ICC_BUILTINS
-            ASSERT( memcmp(&value1,&result1,sizeof(T))==0, NULL );
-            ASSERT( memcmp(&value2,&result2,sizeof(T))==0, NULL );
-        #elif __APPLE__
-            //ICC 12.1 generates incorrect code that crashes when icc builtins port is used
-        #else
-            if (    (memcmp(&value1,&result1,sizeof(T))!=0)
-                 || (memcmp(&value2,&result2,sizeof(T))!=0))
-            {
-                REMARK_ONCE("Known Issue: ICC builtins port seems to generate wrong code of atomic::operator* "
-                        "and operator*-> for some types \n");
-            }
-        #endif
+        ASSERT( memcmp(&value1,&result1,sizeof(T))==0, NULL );
+        ASSERT( memcmp(&value2,&result2,sizeof(T))==0, NULL );
     }
+    #if __TBB_ICC_BUILTIN_ATOMICS_POINTER_ALIASING_BROKEN
+        //prevent ICC compiler from assuming 'item' is unused and reusing it's storage
+        item.x = item.y=item.z;
+    #endif
 }
 
 //! Test atomic<T*>
diff --git a/src/test/test_cilk_dynamic_load.cpp b/src/test/test_cilk_dynamic_load.cpp
index 645d82a..6b393b3 100644
--- a/src/test/test_cilk_dynamic_load.cpp
+++ b/src/test/test_cilk_dynamic_load.cpp
@@ -35,10 +35,10 @@
 // The compiler does not add "-lcilkrts" linker option on some linux systems
 #define CILK_LINKAGE_BROKEN  (__linux__ && __GNUC__<4 && __INTEL_COMPILER_BUILD_DATE <= 20110427)
 // Currently, the interop doesn't support the situation:
-//1) TBB is outermost;
-//2)   Cilk, and it should be dynamically loaded with dlopen/LoadLibrary (possibly via a 3rd party module);
-//3)     TBB again;
-//4)       Cilk again.
+//1) Intel(R) Threading Building Blocks (Intel(R) TBB) is outermost;
+//2)   Intel(R) Cilk(TM) Plus, and it should be dynamically loaded with dlopen/LoadLibrary (possibly via a 3rd party module);
+//3)     Intel(R) TBB again;
+//4)       Intel(R) Cilk(TM) Plus again.
 #define HEAVY_NESTED_INTEROP_SUPPORT ( __INTEL_COMPILER_BUILD_DATE < 20110427 )
 
 #if __TBB_CILK_INTEROP && CILK_SYMBOLS_VISIBLE && !CILK_LINKAGE_BROKEN && HEAVY_NESTED_INTEROP_SUPPORT
diff --git a/src/test/test_concurrent_hash_map.cpp b/src/test/test_concurrent_hash_map.cpp
index 9bc65db..14b0965 100644
--- a/src/test/test_concurrent_hash_map.cpp
+++ b/src/test/test_concurrent_hash_map.cpp
@@ -77,12 +77,12 @@ namespace tbb {
     };
 }
 
-#if HARNESS_USE_PROXY
+#if HARNESS_USE_RUNTIME_LOADER
     #define TBB_PREVIEW_RUNTIME_LOADER 1
     #include "tbb/runtime_loader.h"
     static char const * test_path[] = { ".", NULL };
     static tbb::runtime_loader test_runtime_loader( test_path );
-#endif // HARNESS_USE_PROXY
+#endif // HARNESS_USE_RUNTIME_LOADER
 
 tbb::concurrent_hash_map<UserDefinedKeyType,int> TestInstantiationWithUserDefinedKeyType;
 
diff --git a/src/test/test_concurrent_priority_queue.cpp b/src/test/test_concurrent_priority_queue.cpp
index 7d82765..b91ad9c 100644
--- a/src/test/test_concurrent_priority_queue.cpp
+++ b/src/test/test_concurrent_priority_queue.cpp
@@ -29,8 +29,10 @@
 #include "harness_defs.h"
 #include "tbb/concurrent_priority_queue.h"
 #include "tbb/atomic.h"
+#include "tbb/blocked_range.h"
 #include "harness.h"
 #include <functional>
+#include <algorithm>
 #include <vector>
 
 #if _MSC_VER==1500 && !__INTEL_COMPILER
@@ -154,6 +156,53 @@ public:
     }
 };
 
+namespace equality_comparison_helpers {
+    struct to_vector{
+        template <typename element_type>
+        std::vector<element_type> operator()(tbb::concurrent_priority_queue<element_type> const& source) const{
+            tbb::concurrent_priority_queue<element_type>  cpq((source));
+            std::vector<element_type> v; v.reserve(cpq.size());
+            element_type element;
+            while (cpq.try_pop(element)){ v.push_back(element);}
+            std::reverse(v.begin(),v.end());
+            return v;
+        }
+    };
+}
+//TODO: make CPQ more testable instead of hacking ad-hoc operator ==
+//operator == is required for __TBB_TEST_INIT_LIST_SUITE
+template <typename element_type>
+bool operator==(tbb::concurrent_priority_queue<element_type> const& lhs, tbb::concurrent_priority_queue<element_type> const& rhs){
+    using equality_comparison_helpers::to_vector;
+    return to_vector()(lhs) == to_vector()(rhs);
+}
+
+template <typename element_type, typename  range>
+bool operator==(tbb::concurrent_priority_queue<element_type> const& lhs, range const & rhs ){
+    using equality_comparison_helpers::to_vector;
+    return to_vector()(lhs) == std::vector<element_type>(rhs.begin(),rhs.end());
+}
+
+//TODO: move this to harness
+template<typename T, size_t N>
+tbb::blocked_range<T*> make_blocked_range( T(& array)[N]){ return tbb::blocked_range<T*>(array, array + N);}
+
+
+void TestToVector(){
+    using equality_comparison_helpers::to_vector;
+    int array[] = {1,5,6,8,4,7};
+    tbb::blocked_range<int *> range =  make_blocked_range(array);
+    std::vector<int> source(range.begin(),range.end());
+    tbb::concurrent_priority_queue<int> q(source.begin(),source.end());
+    std::vector<int> from_cpq = to_vector()(q);
+    std::sort(source.begin(),source.end());
+    ASSERT(source == from_cpq,"quality_comparison_helpers::to_vector incorrectly copied items from CPQ?");
+}
+
+void TestHelpers(){
+    TestToVector();
+}
+
 void TestConstructorsDestructorsAccessors() {
     std::vector<int> v;
     std::allocator<int> a;
@@ -204,12 +253,15 @@ void TestConstructorsDestructorsAccessors() {
     REMARK("Iterator filler constructor complete.\n");
     ASSERT(q->size()==42, "FAILED vector/size test.");
     ASSERT(!q->empty(), "FAILED vector/empty test.");
+    ASSERT(*q == v, "FAILED vector/equality test.");
 
     REMARK("Testing copy constructor.\n");
     qo = new concurrent_priority_queue<int, std::less<int> >(*q);
     REMARK("Copy constructor complete.\n");
-    ASSERT(qo->size()==42, "FAILED vector/size test.");
-    ASSERT(!qo->empty(), "FAILED vector/empty test.");
+    ASSERT(qo->size()==42, "FAILED cpq/size test.");
+    ASSERT(!qo->empty(), "FAILED cpq/empty test.");
+    ASSERT(*q == *qo, "FAILED cpq/equality test.");
+
     REMARK("Testing destructor.\n");
     delete q;
     delete qo;
@@ -217,20 +269,30 @@ void TestConstructorsDestructorsAccessors() {
 }
 
 void TestAssignmentClearSwap() {
+    typedef concurrent_priority_queue<int, std::less<int> > cpq_type;
     std::vector<int> v;
-    concurrent_priority_queue<int, std::less<int> > *q, *qo;
+    cpq_type *q, *qo;
     int e;
 
     for (int i=0; i<42; ++i)
         v.push_back(i);
-    q = new concurrent_priority_queue<int, std::less<int> >(v.begin(), v.end());
-    qo = new concurrent_priority_queue<int, std::less<int> >();
+    q = new cpq_type(v.begin(), v.end());
+    qo = new cpq_type();
 
     REMARK("Testing assignment (1).\n");
     *qo = *q; 
     REMARK("Assignment complete.\n");
     ASSERT(qo->size()==42, "FAILED assignment/size test.");
     ASSERT(!qo->empty(), "FAILED assignment/empty test.");
+    ASSERT(*qo == v,"FAILED assignment/equality test");
+
+    cpq_type assigned_q;
+    REMARK("Testing assign(begin,end) (2).\n");
+    assigned_q.assign(v.begin(), v.end());
+    REMARK("Assignment complete.\n");
+    ASSERT(assigned_q.size()==42, "FAILED assignment/size test.");
+    ASSERT(!assigned_q.empty(), "FAILED assignment/empty test.");
+    ASSERT(assigned_q == v,"FAILED assignment/equality test");
 
     REMARK("Testing clear.\n");
     q->clear();
@@ -241,7 +303,7 @@ void TestAssignmentClearSwap() {
     for (size_t i=0; i<5; ++i)
         (void) qo->try_pop(e);
 
-    REMARK("Testing assignment (2).\n");
+    REMARK("Testing assignment (3).\n");
     *q = *qo;
     REMARK("Assignment complete.\n");
     ASSERT(q->size()==37, "FAILED assignment/size test.");
@@ -461,9 +523,33 @@ void TestCpqOnNThreads(int nThreads) {
 #endif
 }
 
+#if __TBB_INITIALIZER_LISTS_PRESENT
+#include "test_initializer_list.h"
+
+#define __TBB_CPQ_TEST_INIT_SEQ {1,2,3,4,5}
+__TBB_TEST_INIT_LIST_SUITE(TestInitListIml,tbb::concurrent_priority_queue,char,__TBB_CPQ_TEST_INIT_SEQ)
+#undef __TBB_CPQ_TEST_INIT_SEQ
+
+#define __TBB_CPQ_TEST_EMPTY_INIT_SEQ {}
+__TBB_TEST_INIT_LIST_SUITE(TestEmptyInitListIml,tbb::concurrent_priority_queue,int,__TBB_CPQ_TEST_EMPTY_INIT_SEQ)
+#undef __TBB_CPQ_TEST_EMPTY_INIT_SEQ
+
+void TestInitList(){
+    REMARK("testing initializer_list methods \n");
+    TestEmptyInitListIml();
+    TestInitListIml();
+}
+#endif //if __TBB_INITIALIZER_LISTS_PRESENT
+
 int TestMain() {
     if (MinThread < 1)
         MinThread = 1;
+
+    TestHelpers();
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    TestInitList();
+#endif
+
     for (int p = MinThread; p <= MaxThread; ++p) {
         REMARK("Testing on %d threads.\n", p);
         TestCpqOnNThreads(p);
diff --git a/src/test/test_concurrent_unordered.cpp b/src/test/test_concurrent_unordered.cpp
index c72a5fe..adb188a 100644
--- a/src/test/test_concurrent_unordered.cpp
+++ b/src/test/test_concurrent_unordered.cpp
@@ -27,6 +27,8 @@
 */
 
 /* Some tests in this source file are based on PPL tests provided by Microsoft. */
+#include "harness_defs.h"
+#if !(__TBB_TEST_SECONDARY && __TBB_CPP11_STD_PLACEHOLDERS_LINKAGE_BROKEN)
 
 #define __TBB_EXTRA_DEBUG 1
 #include "tbb/concurrent_unordered_map.h"
@@ -742,8 +744,13 @@ int TestMain () {
     { Check<MyCheckedMultiMap::value_type> checkit; test_concurrent<MyCheckedMultiMap>("concurrent unordered MultiMap (checked)"); }
 
     test_initialization_time_operations();
-    test_initialization_time_operations_external();
+    #if !__TBB_CPP11_STD_PLACEHOLDERS_LINKAGE_BROKEN
+        test_initialization_time_operations_external();
+    #else
+        REPORT("Known issue: global objects initialization time tests skipped.\n");
+    #endif //!__TBB_CPP11_STD_PLACEHOLDERS_LINKING_BROKEN
 
     return Harness::Done;
 }
 #endif //#if !__TBB_TEST_SECONDARY
+#endif //!(__TBB_TEST_SECONDARY && __TBB_CPP11_STD_PLACEHOLDERS_LINKING_BROKEN)
diff --git a/src/test/test_concurrent_vector.cpp b/src/test/test_concurrent_vector.cpp
index ae1191d..d9d381b 100644
--- a/src/test/test_concurrent_vector.cpp
+++ b/src/test/test_concurrent_vector.cpp
@@ -646,40 +646,7 @@ void TestArrayLength(){
 }
 
 #if __TBB_INITIALIZER_LISTS_PRESENT
-//TODO: move init list test set to separate header
-//TODO: split into set of tests
-//TODO: add test for no leaks, and correct element lifetime
-//the need for macro comes from desire to test different scenarios where initializer sequence is compile time constant
-#define __TBB_TEST_INIT_LIST_SUITE(FUNC_NAME, CONTAINER, ELEMENT_TYPE, INIT_SEQ)                                                                  \
-void FUNC_NAME(){                                                                                                                                 \
-    typedef ELEMENT_TYPE element_type;                                                                                                            \
-    typedef CONTAINER<element_type> container_type;                                                                                               \
-    element_type test_seq[] = INIT_SEQ;                                                                                                           \
-    container_type expected(test_seq,test_seq + array_length(test_seq));                                                                          \
-                                                                                                                                                  \
-    /*test for explicit contructor call*/                                                                                                         \
-    container_type vd (INIT_SEQ,tbb::cache_aligned_allocator<int>());                                                                             \
-    ASSERT(vd == expected,"initialization via explicit constructor call with init list failed");                                                  \
-    /*test for explicit contructor call with std::initializer_list*/                                                                              \
-                                                                                                                                                  \
-    std::initializer_list<element_type> init_list = INIT_SEQ;                                                                                     \
-    container_type v1 (init_list,tbb::cache_aligned_allocator<int>());                                                                            \
-    ASSERT(v1 == expected,"initialization via explicit constructor call with std::initializer_list failed");                                      \
-                                                                                                                                                  \
-    /*implicit constructor call test*/                                                                                                            \
-    container_type v = INIT_SEQ;                                                                                                                  \
-    ASSERT(v == expected,"init list constructor failed");                                                                                         \
-                                                                                                                                                  \
-    /*assignment operator test*/                                                                                                                  \
-    /*TODO: count created and destroyed injects to assert that no extra copy of vector was created implicitly*/                                   \
-    container_type va;                                                                                                                            \
-    va = INIT_SEQ;                                                                                                                                \
-    ASSERT(va == expected,"init list operator= failed");                                                                                          \
-                                                                                                                                                  \
-    container_type vae;                                                                                                                           \
-    vae.assign(INIT_SEQ);                                                                                                                          \
-    ASSERT(vae == expected,"init list assign failed");                                                                                             \
-}
+#include "test_initializer_list.h"
 
 #define __TBB_CVECTOR_TEST_INIT_SEQ {1,2,3,4,5}
 __TBB_TEST_INIT_LIST_SUITE(TestInitListIml,tbb::concurrent_vector,char,__TBB_CVECTOR_TEST_INIT_SEQ )
diff --git a/src/test/test_fast_random.cpp b/src/test/test_fast_random.cpp
index ef6e989..bc2fc67 100644
--- a/src/test/test_fast_random.cpp
+++ b/src/test/test_fast_random.cpp
@@ -33,27 +33,14 @@
     by no more than AcceptableDeviation percent.
 **/
 
-#if HARNESS_USE_PROXY
-
-// The test includes injects scheduler directly, so skip it when proxy tested.
-
-#undef HARNESS_USE_PROXY
-#include "harness.h"
-
-int TestMain () {
-    return Harness::Skipped;
-}
-
-#else // HARNESS_USE_PROXY
+#define HARNESS_DEFAULT_MIN_THREADS 2
+#define HARNESS_DEFAULT_MAX_THREADS 32
 
 #include <algorithm> // include it first to avoid error on define below
 #define private public
 #include "harness_inject_scheduler.h"
 #undef private
 
-#define HARNESS_DEFAULT_MIN_THREADS 2
-#define HARNESS_DEFAULT_MAX_THREADS 32
-
 #define TEST_TOTAL_SEQUENCE 0
 
 #include "harness.h"
@@ -106,7 +93,7 @@ struct CheckDistributionBody {
             // Seed value mimics the one used by the TBB task scheduler
             void* seed = (char*)&curHits + i * 16;
             tbb::internal::FastRandom random( seed );
-            // According to Section 3.2.1.2 of Volume 2 of Knuth�s Art of Computer Programming
+            // According to Section 3.2.1.2 of Volume 2 of Knuth's Art of Computer Programming
             // the following conditions must be hold for m=2^32:
             ASSERT((random.c&1)!=0, "c is relatively prime to m");
             ASSERT((random.a-1)%4==0, "a-1 is a multiple of p, for every prime p dividing m."
@@ -221,4 +208,3 @@ int TestMain () {
     }
     return Harness::Done;
 }
-#endif // HARNESS_USE_PROXY
diff --git a/src/test/test_hw_concurrency.cpp b/src/test/test_hw_concurrency.cpp
index ffbd0b2..42a634b 100644
--- a/src/test/test_hw_concurrency.cpp
+++ b/src/test/test_hw_concurrency.cpp
@@ -37,23 +37,7 @@ int TestMain() {
 #else /* affinity mask can be set and used by TBB */
 
 #include "harness.h"
-
-#include <limits.h>
-
-#if _WIN32||_WIN64
-#include "tbb/machine/windows_api.h"
-#elif __linux__
-#include <unistd.h>
-#include <sys/sysinfo.h>
-#include <string.h>
-#include <sched.h>
-#elif __FreeBSD__
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <sys/param.h>  // Required by <sys/cpuset.h>
-#include <sys/cpuset.h>
-#endif
+#include "harness_concurrency.h"
 
 #include "tbb/task_scheduler_init.h"
 #include "tbb/tbb_thread.h"
@@ -66,44 +50,13 @@ int TestMain() {
 tbb::enumerable_thread_specific<std::size_t> ets;
 
 int TestMain () {
-#if _WIN32||_WIN64
-    SYSTEM_INFO si;
-    GetNativeSystemInfo(&si);
-    if ( si.dwNumberOfProcessors < 2 )
-        return Harness::Skipped;
-    int availableProcs = (int)si.dwNumberOfProcessors / 2;
-    DWORD_PTR mask = 1;
-    for ( int i = 1; i < availableProcs; ++i )
-        mask |= mask << 1;
-    bool err = !SetProcessAffinityMask( GetCurrentProcess(), mask );
-#else /* !WIN */
-#if __linux__
-    int maxProcs = get_nprocs();
-    typedef cpu_set_t mask_t;
-#if __TBB_MAIN_THREAD_AFFINITY_BROKEN
-    #define setaffinity(mask) sched_setaffinity(0 /*get the mask of the calling thread*/, sizeof(mask_t), &mask)
-#else
-    #define setaffinity(mask) sched_setaffinity(getpid(), sizeof(mask_t), &mask)
-#endif
-#else /* __FreeBSD__ */
-    int maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
-    typedef cpuset_t mask_t;
-#if __TBB_MAIN_THREAD_AFFINITY_BROKEN
-    #define setaffinity(mask) cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask_t), &mask)
-#else
-    #define setaffinity(mask) cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(mask_t), &mask)
-#endif
-#endif /* __FreeBSD__ */
+    int maxProcs = Harness::GetMaxProcs();
+
     if ( maxProcs < 2 )
         return Harness::Skipped;
-    mask_t newMask;
-    CPU_ZERO(&newMask);
-    int availableProcs = min(maxProcs, (int)sizeof(mask_t) * CHAR_BIT) / 2;
-    for ( int i = 0; i < availableProcs; ++i )
-        CPU_SET( i, &newMask );
-    int err = setaffinity( newMask );
-#endif /* !WIN */
-    ASSERT( !err, "Setting process affinity failed" );
+
+    int availableProcs = maxProcs/2;
+    ASSERT( Harness::LimitNumberOfThreads( availableProcs ) == availableProcs, "LimitNumberOfThreads has not set the requested limitation." );
     ASSERT( tbb::task_scheduler_init::default_num_threads() == availableProcs, NULL );
     ASSERT( (int)tbb::tbb_thread::hardware_concurrency() == availableProcs, NULL );
     return Harness::Done;
diff --git a/src/test/test_initializer_list.h b/src/test/test_initializer_list.h
new file mode 100644
index 0000000..9b281fa
--- /dev/null
+++ b/src/test/test_initializer_list.h
@@ -0,0 +1,71 @@
+/*
+    Copyright 2005-2013 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_test_initializer_list_H
+#define __TBB_test_initializer_list_H
+#include "tbb/tbb_config.h"
+
+
+#if __TBB_INITIALIZER_LISTS_PRESENT
+#include <initializer_list>
+//TODO: split into set of tests
+//TODO: add test for no leaks, and correct element lifetime
+//the need for macro comes from desire to test different scenarios where initializer sequence is compile time constant
+#define __TBB_TEST_INIT_LIST_SUITE(FUNC_NAME, CONTAINER, ELEMENT_TYPE, INIT_SEQ)                                                                  \
+void FUNC_NAME(){                                                                                                                                 \
+    typedef ELEMENT_TYPE element_type;                                                                                                            \
+    typedef CONTAINER<element_type> container_type;                                                                                               \
+    element_type test_seq[] = INIT_SEQ;                                                                                                           \
+    container_type expected(test_seq,test_seq + array_length(test_seq));                                                                          \
+                                                                                                                                                  \
+    /*test for explicit contructor call*/                                                                                                         \
+    container_type vd (INIT_SEQ,tbb::cache_aligned_allocator<int>());                                                                             \
+    ASSERT(vd == expected,"initialization via explicit constructor call with init list failed");                                                  \
+    /*test for explicit contructor call with std::initializer_list*/                                                                              \
+                                                                                                                                                  \
+    std::initializer_list<element_type> init_list = INIT_SEQ;                                                                                     \
+    container_type v1 (init_list,tbb::cache_aligned_allocator<int>());                                                                            \
+    ASSERT(v1 == expected,"initialization via explicit constructor call with std::initializer_list failed");                                      \
+                                                                                                                                                  \
+    /*implicit constructor call test*/                                                                                                            \
+    container_type v = INIT_SEQ;                                                                                                                  \
+    ASSERT(v == expected,"init list constructor failed");                                                                                         \
+                                                                                                                                                  \
+    /*assignment operator test*/                                                                                                                  \
+    /*TODO: count created and destroyed injects to assert that no extra copy of vector was created implicitly*/                                   \
+    container_type va;                                                                                                                            \
+    va = INIT_SEQ;                                                                                                                                \
+    ASSERT(va == expected,"init list operator= failed");                                                                                          \
+                                                                                                                                                  \
+    container_type vae;                                                                                                                           \
+    vae.assign(INIT_SEQ);                                                                                                                         \
+    ASSERT(vae == expected,"init list assign failed");                                                                                            \
+}                                                                                                                                                 \
+
+#endif //__TBB_INITIALIZER_LISTS_PRESENT
+#endif //__TBB_test_initializer_list_H
diff --git a/src/test/test_intrusive_list.cpp b/src/test/test_intrusive_list.cpp
index 9ca1db3..a8c3eb0 100644
--- a/src/test/test_intrusive_list.cpp
+++ b/src/test/test_intrusive_list.cpp
@@ -134,7 +134,7 @@ void TestListAssertions () {
     TRY_BAD_EXPR( il1.push_front(n1), "only one intrusive list" );
     il2.push_front(n2);
     TRY_BAD_EXPR( il1.remove(n3), "not in the list" );
-    tbb::set_assertion_handler( NULL );
+    tbb::set_assertion_handler( ReportError );
 #endif /* TRY_BAD_EXPR_ENABLED */
 }
 
diff --git a/src/test/test_malloc_compliance.cpp b/src/test/test_malloc_compliance.cpp
index 9a90aac..3f2366c 100644
--- a/src/test/test_malloc_compliance.cpp
+++ b/src/test/test_malloc_compliance.cpp
@@ -181,7 +181,7 @@ TestAlignedRealloc* Raligned_realloc;
 bool error_occurred = false;
 
 #if __APPLE__
-// Tests that use the variable are skipped on Mac OS* X
+// Tests that use the variable are skipped on OS X*
 #else
 static bool perProcessLimits = true;
 #endif
@@ -262,7 +262,7 @@ static void setSystemAllocs()
     Taligned_free=_aligned_free;
     Rposix_memalign=0;
 #elif  __APPLE__ || __sun || __ANDROID__ 
-// Mac OS* X, Solaris, and Android don't have posix_memalign
+// OS X*, Solaris, and Android don't have posix_memalign
     Raligned_malloc=0;
     Raligned_realloc=0;
     Taligned_free=0;
@@ -339,7 +339,7 @@ int main(int argc, char* argv[]) {
 #endif
     //-------------------------------------
 #if __APPLE__
-    /* Skip due to lack of memory limit enforcing under Mac OS X. */
+    /* Skip due to lack of memory limit enforcing under OS X*. */
 #else
     limitMem(200);
     ReallocParam();
@@ -1017,7 +1017,7 @@ void CMemTest::RunAllTests(int total_threads)
         InvariantDataRealloc(/*aligned=*/true);
     TestAlignedParameters();
 #if __APPLE__
-    REPORT("Known issue: some tests are skipped on Mac OS* X\n");
+    REPORT("Known issue: some tests are skipped on OS X*\n");
 #else
     UniquePointer();
     AddrArifm();
diff --git a/src/test/test_runtime_loader.cpp b/src/test/test_runtime_loader.cpp
index 55dbd5d..90bc7f8 100644
--- a/src/test/test_runtime_loader.cpp
+++ b/src/test/test_runtime_loader.cpp
@@ -204,7 +204,7 @@ int TestMain() {
                     // Check what?
                 }
 
-                // There was a problem on Linux, and still a problem on Mac OS X.
+                // There was a problem on Linux* OS, and still a problem on OS X*.
                 SAY( "Throw an exception." );
                 // Iterate thru all the ids first.
                 for ( int id = 1; id < tbb::internal::eid_max; ++ id ) {
diff --git a/src/test/test_task_assertions.cpp b/src/test/test_task_assertions.cpp
index 2bc2d82..20b4d62 100644
--- a/src/test/test_task_assertions.cpp
+++ b/src/test/test_task_assertions.cpp
@@ -26,19 +26,6 @@
     the GNU General Public License.
 */
 
-#if HARNESS_USE_PROXY
-
-// The test includes injects scheduler directly, so skip it when proxy tested.
-
-#undef HARNESS_USE_PROXY
-#include "harness.h"
-
-int TestMain () {
-    return Harness::Skipped;
-}
-
-#else // HARNESS_USE_PROXY
-
 // Test correctness of forceful TBB initialization before any dynamic initialization
 // of static objects inside the library took place.
 namespace tbb { 
@@ -55,9 +42,8 @@ struct StaticInitializationChecker {
 // Test that important assertions in class task fail as expected.
 //------------------------------------------------------------------------
 
-#include "harness_inject_scheduler.h"
-
 #define HARNESS_NO_PARSE_COMMAND_LINE 1
+#include "harness_inject_scheduler.h"
 #include "harness.h"
 #include "harness_bad_expr.h"
 
@@ -99,7 +85,7 @@ void TestTaskAssertions() {
     ASSERT( AbuseOneTaskRan==1, NULL );
     tbb::task::destroy(*AbusedTask);
     // Restore normal assertion handling
-    tbb::set_assertion_handler( NULL );
+    tbb::set_assertion_handler( ReportError );
 }
 
 int TestMain () {
@@ -114,5 +100,3 @@ int TestMain () {
 }
 
 #endif /* !TRY_BAD_EXPR_ENABLED */
-
-#endif // HARNESS_USE_PROXY
diff --git a/src/test/test_task_enqueue.cpp b/src/test/test_task_enqueue.cpp
index fcd262d..d59cbb8 100644
--- a/src/test/test_task_enqueue.cpp
+++ b/src/test/test_task_enqueue.cpp
@@ -120,7 +120,7 @@ public:
             else {
                 ++stall_count;
                 // no progress for at least 0.1 s; consider it dead.
-                ASSERT(stall_count < stall_threshold, "no progress on enqueued tasks; deadlock, or the machine is oversubsribed?");
+                ASSERT(stall_count < stall_threshold, "no progress on enqueued tasks; deadlock, or the machine is oversubscribed?");
             }
             if( progress_mask==all_progressed || progress_mask^last_progress_mask ) {
                 uneven_progress_count = 0;
diff --git a/src/test/test_task_group.cpp b/src/test/test_task_group.cpp
index 413f83b..4127dce 100644
--- a/src/test/test_task_group.cpp
+++ b/src/test/test_task_group.cpp
@@ -842,7 +842,15 @@ int TestMain () {
         TestEh2();
         TestStructuredWait();
         TestStructuredCancellation2<true>();
+        //this condition can not be moved harness_defs.h as the only way to detect std C++ library is to include something from it.
+        //TODO: recheck the condition with newer versions of clang/libc++
+#if (__clang__ && _LIBCPP_VERSION && __GXX_EXPERIMENTAL_CXX0X__)
+        //TODO:it seems that clang with libc++ in C++11 mode does not expect exception
+        //coming from destructor in the following test as it does not generate correct code for stack unwinding.
+        REPORT("Known issue: TestStructuredCancellation2<false> test is skipped.\n");
+#else
         TestStructuredCancellation2<false>();
+#endif
 #endif /* TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN */
 #if !TBBTEST_USE_TBB
         s->Release();
diff --git a/src/test/test_task_leaks.cpp b/src/test/test_task_leaks.cpp
index 01c21a6..9966690 100644
--- a/src/test/test_task_leaks.cpp
+++ b/src/test/test_task_leaks.cpp
@@ -38,18 +38,7 @@
     switching producer thread, and the check is repeated.
 */
 
-#if HARNESS_USE_PROXY
-
-// The test includes injects scheduler directly, so skip it when proxy tested.
-
-#undef HARNESS_USE_PROXY
-#include "harness.h"
-
-int TestMain () {
-    return Harness::Skipped;
-}
-
-#else // HARNESS_USE_PROXY
+#define HARNESS_DEFAULT_MIN_THREADS -1
 
 #define  __TBB_COUNT_TASK_NODES 1
 #include "harness_inject_scheduler.h"
@@ -78,7 +67,6 @@ tbb::internal::scheduler* Producer;
 
 #include "tbb/task_scheduler_init.h"
 
-#define HARNESS_DEFAULT_MIN_THREADS -1
 #include "harness.h"
 
 using namespace tbb;
@@ -288,6 +276,3 @@ int TestMain () {
     TestTaskReclamation();
     return Harness::Done;
 }
-
-#endif  // HARNESS_USE_PROXY
-
diff --git a/src/test/test_task_priority.cpp b/src/test/test_task_priority.cpp
index 9cb2ae7..352fd3b 100644
--- a/src/test/test_task_priority.cpp
+++ b/src/test/test_task_priority.cpp
@@ -46,8 +46,8 @@
 
 const int NumIterations = 100;
 const int NumLeafTasks = 2;
-int MinBaseDepth = 9;
-int MaxBaseDepth = 11;
+int MinBaseDepth = 8;
+int MaxBaseDepth = 10;
 int BaseDepth = 0;
 
 const int NumTests = 8;
@@ -404,7 +404,7 @@ void TestPriorityAssertions () {
     tbb::task &t = *new( tbb::task::allocate_root() ) tbb::empty_task;
     TRY_BAD_EXPR( tbb::task::enqueue( t, bad_high_priority ), "Invalid priority level value" );
     // Restore normal assertion handling
-    tbb::set_assertion_handler( NULL );
+    tbb::set_assertion_handler( ReportError );
 #endif /* TRY_BAD_EXPR_ENABLED && __TBB_TASK_PRIORITY */
 }
 
@@ -454,7 +454,14 @@ void TestEnqueueOrder () {
 }
 #endif /* __TBB_TASK_PRIORITY */
 
+#if !__TBB_TEST_SKIP_AFFINITY
+#include "harness_concurrency.h"
+#endif
+
 int TestMain () {
+#if !__TBB_TEST_SKIP_AFFINITY
+    Harness::LimitNumberOfThreads( 16 );
+#endif
 #if !__TBB_TASK_PRIORITY
     REMARK( "Priorities disabled: Running as just yet another task scheduler test\n" );
 #else
@@ -464,6 +471,7 @@ int TestMain () {
     TestSimplePriorityOps(tbb::priority_low);
     TestSimplePriorityOps(tbb::priority_high);
     P = tbb::task_scheduler_init::default_num_threads();
+    REMARK( "The number of threads: %d\n", P );
     if ( P < 3 )
         return Harness::Skipped;
     TestPeriodicConcurrentActivities();
diff --git a/src/test/test_tbb_header.cpp b/src/test/test_tbb_header.cpp
index a2cbed4..2dc49ae 100644
--- a/src/test/test_tbb_header.cpp
+++ b/src/test/test_tbb_header.cpp
@@ -32,6 +32,8 @@
 
     Most of the checks happen at the compilation or link phases.
 **/
+#include "harness_defs.h"
+#if !(__TBB_TEST_SECONDARY && __TBB_CPP11_STD_PLACEHOLDERS_LINKAGE_BROKEN)
 
 #if _MSC_VER
 #pragma warning (disable : 4503)      // decorated name length exceeded, name was truncated
@@ -141,6 +143,9 @@ void secondary()
 int TestMain ()
 #endif
 {
+    #if __TBB_CPP11_STD_PLACEHOLDERS_LINKAGE_BROKEN
+        REPORT("Known issue: \"multiple definition\" linker error detection test skipped.\n");
+    #endif
     TestTypeDefinitionPresence2(aligned_space<int, 1> );
     TestTypeDefinitionPresence( atomic<int> );
     TestTypeDefinitionPresence( cache_aligned_allocator<int> );
@@ -217,3 +222,4 @@ int TestMain ()
     return Harness::Done;
 #endif
 }
+#endif //!(__TBB_TEST_SECONDARY && __TBB_CPP11_STD_PLACEHOLDERS_LINKING_BROKEN)
diff --git a/src/test/test_tbb_version.cpp b/src/test/test_tbb_version.cpp
index a9f7533..619118f 100644
--- a/src/test/test_tbb_version.cpp
+++ b/src/test/test_tbb_version.cpp
@@ -246,7 +246,7 @@ int main(int argc, char *argv[] ) {
 void initialize_strings_vector(std::vector <string_pair>* vector)
 {
     vector->push_back(string_pair("TBB: VERSION\t\t4.1", required));          // check TBB_VERSION
-    vector->push_back(string_pair("TBB: INTERFACE VERSION\t6104", required)); // check TBB_INTERFACE_VERSION
+    vector->push_back(string_pair("TBB: INTERFACE VERSION\t6105", required)); // check TBB_INTERFACE_VERSION
     vector->push_back(string_pair("TBB: BUILD_DATE", required));
     vector->push_back(string_pair("TBB: BUILD_HOST", required));
     vector->push_back(string_pair("TBB: BUILD_OS", required));

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/tbb.git



More information about the debian-science-commits mailing list