[libstxxl1] 01/02: Imported Upstream version 1.4.1

D Haley mycae-guest at moszumanska.debian.org
Thu May 14 21:56:01 UTC 2015


This is an automated email from the git hooks/post-receive script.

mycae-guest pushed a commit to branch master
in repository libstxxl1.

commit 7d8f358c9016b151cdb196379a6cf68c86c21796
Author: D Haley <mycae at gmx.com>
Date:   Thu May 14 23:55:44 2015 +0200

    Imported Upstream version 1.4.1
---
 AUTHORS                                            |    1 +
 CHANGELOG                                          |   38 +
 CMakeLists.txt                                     |  116 +-
 Doxyfile                                           |    5 +-
 TODO                                               |    7 -
 doc/common.dox                                     |   21 +
 doc/design.dox                                     |   35 +-
 doc/faq.dox                                        |   46 +-
 doc/images/layer_diagram.pdf                       |  Bin 106663 -> 29116 bytes
 doc/images/layer_diagram.png                       |  Bin 58493 -> 54489 bytes
 doc/images/layer_diagram.svg                       |  184 +--
 doc/images/layer_diagram_small.png                 |  Bin 30014 -> 49060 bytes
 doc/install.dox                                    |   48 +-
 doc/mainpage.dox                                   |   44 +-
 doc/tutorial.dox                                   |    1 +
 doc/tutorial_unordered_map.dox                     |   59 +
 examples/algo/copy_and_sort_file.cpp               |   27 +-
 examples/algo/phonebills.cpp                       |    3 +-
 examples/algo/phonebills_genlog.cpp                |    9 +-
 examples/algo/sort_file.cpp                        |   26 +-
 examples/applications/skew3.cpp                    |   15 +-
 examples/containers/CMakeLists.txt                 |    4 +-
 examples/containers/deque2.cpp                     |    2 +-
 examples/containers/unordered_map1.cpp             |   89 ++
 include/stxxl.h                                    |    1 +
 include/stxxl/bits/algo/adaptor.h                  |  126 +-
 include/stxxl/bits/algo/async_schedule.h           |   27 +-
 include/stxxl/bits/algo/inmemsort.h                |    9 +-
 include/stxxl/bits/algo/intksort.h                 |  107 +-
 include/stxxl/bits/algo/ksort.h                    |  332 ++--
 include/stxxl/bits/algo/losertree.h                |   34 +-
 include/stxxl/bits/algo/random_shuffle.h           |  168 +-
 include/stxxl/bits/algo/run_cursor.h               |   48 +-
 include/stxxl/bits/algo/scan.h                     |   69 +-
 include/stxxl/bits/algo/sort.h                     |  169 +-
 include/stxxl/bits/algo/sort_base.h                |    1 -
 include/stxxl/bits/algo/sort_helper.h              |   57 +-
 include/stxxl/bits/algo/stable_ksort.h             |  105 +-
 include/stxxl/bits/common/addressable_queues.h     |   44 +-
 include/stxxl/bits/common/aligned_alloc.h          |   46 +-
 include/stxxl/bits/common/binary_buffer.h          |  650 ++++++++
 include/stxxl/bits/common/cmdline.h                |   95 +-
 include/stxxl/bits/common/condition_variable.h     |    1 -
 include/stxxl/bits/common/counting_ptr.h           |    6 +-
 include/stxxl/bits/common/error_handling.h         |    1 -
 include/stxxl/bits/common/exceptions.h             |   41 +-
 include/stxxl/bits/common/exithandler.h            |   16 +-
 include/stxxl/bits/common/external_shared_ptr.h    |  119 ++
 include/stxxl/bits/common/is_sorted.h              |   41 +-
 include/stxxl/bits/common/log.h                    |    1 -
 include/stxxl/bits/common/mutex.h                  |    1 -
 include/stxxl/bits/common/new_alloc.h              |   72 +-
 include/stxxl/bits/common/onoff_switch.h           |    1 -
 include/stxxl/bits/common/rand.h                   |   10 +-
 include/stxxl/bits/common/seed.h                   |    1 -
 include/stxxl/bits/common/semaphore.h              |    1 -
 include/stxxl/bits/common/settings.h               |    7 +-
 include/stxxl/bits/common/simple_vector.h          |    7 +-
 include/stxxl/bits/common/state.h                  |    1 -
 include/stxxl/bits/common/timer.h                  |   30 +-
 include/stxxl/bits/common/tmeta.h                  |    2 -
 include/stxxl/bits/common/tuple.h                  |    5 -
 include/stxxl/bits/common/types.h                  |    3 -
 include/stxxl/bits/common/uint_types.h             |   32 +-
 include/stxxl/bits/common/utils.h                  |   33 +-
 include/stxxl/bits/compat/hash_map.h               |   43 +-
 include/stxxl/bits/compat/type_traits.h            |   50 +-
 include/stxxl/bits/compat/unique_ptr.h             |   28 +-
 include/stxxl/bits/config.h.in                     |    5 +
 include/stxxl/bits/containers/btree/btree.h        |  998 ++++++------
 include/stxxl/bits/containers/btree/iterator.h     |  194 +--
 include/stxxl/bits/containers/btree/iterator_map.h |   70 +-
 include/stxxl/bits/containers/btree/leaf.h         |  498 +++---
 include/stxxl/bits/containers/btree/node.h         |  525 ++++---
 include/stxxl/bits/containers/btree/node_cache.h   |  431 +++---
 include/stxxl/bits/containers/btree/root_node.h    |    1 -
 include/stxxl/bits/containers/deque.h              |    5 +-
 .../stxxl/bits/containers/hash_map/block_cache.h   |  613 ++++++++
 include/stxxl/bits/containers/hash_map/hash_map.h  | 1609 ++++++++++++++++++++
 include/stxxl/bits/containers/hash_map/iterator.h  |  587 +++++++
 .../stxxl/bits/containers/hash_map/iterator_map.h  |  279 ++++
 include/stxxl/bits/containers/hash_map/tuning.h    |   50 +
 include/stxxl/bits/containers/hash_map/util.h      |  577 +++++++
 include/stxxl/bits/containers/map.h                |   87 +-
 include/stxxl/bits/containers/matrix.h             |   26 +-
 include/stxxl/bits/containers/matrix_arithmetic.h  |    2 +-
 include/stxxl/bits/containers/pager.h              |    1 -
 include/stxxl/bits/containers/pq_ext_merger.h      |   55 +-
 include/stxxl/bits/containers/pq_helpers.h         |   68 +-
 include/stxxl/bits/containers/pq_losertree.h       |  123 +-
 include/stxxl/bits/containers/pq_mergers.h         |   22 +-
 include/stxxl/bits/containers/priority_queue.h     |  154 +-
 include/stxxl/bits/containers/queue.h              |   43 +-
 include/stxxl/bits/containers/sequence.h           |   39 +-
 include/stxxl/bits/containers/sorter.h             |    9 +-
 include/stxxl/bits/containers/stack.h              |  143 +-
 include/stxxl/bits/containers/unordered_map.h      |  498 ++++++
 include/stxxl/bits/containers/vector.h             |   88 +-
 include/stxxl/bits/defines.h                       |    2 +-
 include/stxxl/bits/io/boostfd_file.h               |   18 +-
 include/stxxl/bits/io/completion_handler.h         |   58 +-
 include/stxxl/bits/io/disk_queued_file.h           |   17 +-
 include/stxxl/bits/io/disk_queues.h                |   38 +-
 include/stxxl/bits/io/file.h                       |  131 +-
 include/stxxl/bits/io/fileperblock_file.h          |    7 +-
 include/stxxl/bits/io/io.h                         |    8 +-
 include/stxxl/bits/io/iostats.h                    |  100 +-
 include/stxxl/bits/io/linuxaio_file.h              |   82 +
 include/stxxl/bits/io/linuxaio_queue.h             |  100 ++
 include/stxxl/bits/io/linuxaio_request.h           |   74 +
 include/stxxl/bits/io/mem_file.h                   |   22 +-
 include/stxxl/bits/io/mmap_file.h                  |   16 +-
 include/stxxl/bits/io/request.h                    |   59 +-
 include/stxxl/bits/io/request_interface.h          |    9 +-
 include/stxxl/bits/io/request_operations.h         |   28 +-
 include/stxxl/bits/io/request_queue.h              |    4 +-
 include/stxxl/bits/io/request_queue_impl_1q.h      |   15 +-
 include/stxxl/bits/io/request_queue_impl_qwqr.h    |   20 +-
 include/stxxl/bits/io/request_queue_impl_worker.h  |    6 +-
 include/stxxl/bits/io/request_with_state.h         |   16 +-
 include/stxxl/bits/io/request_with_waiters.h       |   28 +-
 include/stxxl/bits/io/serving_request.h            |    8 +-
 include/stxxl/bits/io/simdisk_file.h               |   33 +-
 include/stxxl/bits/io/syscall_file.h               |   13 +-
 include/stxxl/bits/io/ufs_file_base.h              |    1 -
 include/stxxl/bits/io/wbtl_file.h                  |    4 +-
 include/stxxl/bits/io/wfs_file_base.h              |    1 -
 include/stxxl/bits/io/wincall_file.h               |   15 +-
 include/stxxl/bits/mng/adaptor.h                   |  387 ++---
 include/stxxl/bits/mng/bid.h                       |   31 +-
 include/stxxl/bits/mng/block_alloc.h               |   15 +-
 include/stxxl/bits/mng/block_alloc_interleaved.h   |    2 -
 include/stxxl/bits/mng/block_manager.h             |   13 +-
 include/stxxl/bits/mng/block_prefetcher.h          |   37 +-
 include/stxxl/bits/mng/block_scheduler.h           |    6 +-
 include/stxxl/bits/mng/buf_istream.h               |   36 +-
 include/stxxl/bits/mng/buf_istream_reverse.h       |   18 +-
 include/stxxl/bits/mng/buf_ostream.h               |   22 +-
 include/stxxl/bits/mng/buf_writer.h                |   16 +-
 include/stxxl/bits/mng/config.h                    |   27 +-
 include/stxxl/bits/mng/disk_allocator.h            |   35 +-
 include/stxxl/bits/mng/prefetch_pool.h             |    2 -
 include/stxxl/bits/mng/read_write_pool.h           |   12 +-
 include/stxxl/bits/mng/typed_block.h               |   35 +-
 include/stxxl/bits/mng/write_pool.h                |    9 +-
 include/stxxl/bits/parallel.h                      |    3 -
 include/stxxl/bits/singleton.h                     |    1 -
 include/stxxl/bits/stream/choose.h                 |   77 +-
 include/stxxl/bits/stream/sort_stream.h            |  430 +++---
 include/stxxl/bits/stream/sorted_runs.h            |    6 +-
 include/stxxl/bits/stream/stream.h                 |  800 +++++-----
 include/stxxl/bits/stream/unique.h                 |   10 +-
 include/stxxl/bits/unused.h                        |    1 -
 include/stxxl/bits/utils/malloc.h                  |    2 -
 include/stxxl/bits/verbose.h                       |   40 +-
 lib/common/rand.cpp => include/stxxl/unordered_map |   17 +-
 lib/CMakeLists.txt                                 |   13 +-
 lib/algo/async_schedule.cpp                        |   16 +-
 lib/common/cmdline.cpp                             |   13 +-
 lib/common/exithandler.cpp                         |    1 -
 lib/common/rand.cpp                                |    1 -
 lib/common/seed.cpp                                |    3 +-
 lib/common/verbose.cpp                             |    2 -
 lib/common/version.cpp                             |    1 -
 lib/io/boostfd_file.cpp                            |   46 +-
 lib/io/create_file.cpp                             |   83 +-
 lib/io/disk_queued_file.cpp                        |    2 -
 lib/io/fileperblock_file.cpp                       |   31 +-
 lib/io/iostats.cpp                                 |   72 +-
 lib/io/linuxaio_file.cpp                           |   66 +
 lib/io/linuxaio_queue.cpp                          |  284 ++++
 lib/io/linuxaio_request.cpp                        |  129 ++
 lib/io/mem_file.cpp                                |   39 +-
 lib/io/mmap_file.cpp                               |   10 +-
 lib/io/request.cpp                                 |   83 +-
 lib/io/request_queue_impl_1q.cpp                   |   55 +-
 lib/io/request_queue_impl_qwqr.cpp                 |  106 +-
 lib/io/request_queue_impl_worker.cpp               |    1 -
 lib/io/request_with_state.cpp                      |   44 +-
 lib/io/request_with_waiters.cpp                    |   23 +-
 lib/io/serving_request.cpp                         |   31 +-
 lib/io/simdisk_file.cpp                            |   17 +-
 lib/io/syscall_file.cpp                            |   35 +-
 lib/io/ufs_file_base.cpp                           |    5 +-
 lib/io/wbtl_file.cpp                               |   30 +-
 lib/io/wfs_file_base.cpp                           |    4 +-
 lib/io/wincall_file.cpp                            |    9 +-
 lib/mng/config.cpp                                 |   65 +-
 lib/mng/disk_allocator.cpp                         |    3 -
 local/test1.cpp                                    |   18 +-
 local/test2.cpp                                    |   68 +
 misc/analyze-source.pl                             |   68 +-
 misc/cmake/GetGitRevisionDescription.cmake         |  123 --
 misc/cmake/GetGitRevisionDescription.cmake.in      |   38 -
 misc/do-release.txt                                |   15 +-
 misc/uncrustify.cfg                                |    4 +-
 tests/algo/CMakeLists.txt                          |    1 +
 tests/algo/test_asch.cpp                           |    3 -
 tests/algo/test_bad_cmp.cpp                        |   46 +-
 tests/algo/test_ksort.cpp                          |   29 +-
 tests/algo/test_ksort_all_parameters.cpp           |    4 +-
 tests/algo/test_parallel_sort.cpp                  |   48 +-
 tests/algo/test_random_shuffle.cpp                 |    2 -
 tests/algo/test_scan.cpp                           |    4 -
 tests/algo/test_sort.cpp                           |   17 +-
 tests/algo/test_sort_all_parameters.cpp            |    4 +-
 tests/algo/test_sort_all_parameters.h              |   26 +-
 tests/algo/test_stable_ksort.cpp                   |   13 +-
 tests/algo/test_stable_ksort_all_parameters.cpp    |    4 +-
 tests/common/CMakeLists.txt                        |   10 +-
 tests/common/test_binary_buffer.cpp                |   90 ++
 tests/common/test_external_shared_ptr.cpp          |  291 ++++
 tests/containers/CMakeLists.txt                    |    3 +-
 tests/containers/btree/CMakeLists.txt              |   22 +-
 tests/containers/btree/test_btree.cpp              |    8 +-
 ...st_const_scan.cpp => test_btree_const_scan.cpp} |    4 +-
 ...nsert_erase.cpp => test_btree_insert_erase.cpp} |    6 +-
 ..._insert_find.cpp => test_btree_insert_find.cpp} |    7 +-
 ..._insert_scan.cpp => test_btree_insert_scan.cpp} |    5 +-
 tests/containers/hash_map/CMakeLists.txt           |   21 +
 tests/containers/hash_map/test_hash_map.cpp        |  317 ++++
 .../hash_map/test_hash_map_block_cache.cpp         |  155 ++
 .../hash_map/test_hash_map_iterators.cpp           |  390 +++++
 .../hash_map/test_hash_map_reader_writer.cpp       |  176 +++
 tests/containers/test_ext_merger.cpp               |    1 -
 tests/containers/test_ext_merger2.cpp              |    7 +-
 tests/containers/test_iterators.cpp                |    3 -
 tests/containers/test_many_stacks.cpp              |    1 -
 tests/containers/test_map.cpp                      |    8 +-
 tests/containers/test_map_random.cpp               |    4 -
 tests/containers/test_migr_stack.cpp               |   11 +-
 tests/containers/test_sorter.cpp                   |   15 +-
 tests/containers/test_stack.cpp                    |   57 +-
 tests/containers/test_vector.cpp                   |    2 -
 tests/containers/test_vector_buf.cpp               |   18 +-
 tests/containers/test_vector_export.cpp            |    1 -
 tests/containers/test_vector_sizes.cpp             |    1 -
 tests/io/CMakeLists.txt                            |   10 +-
 tests/io/test_cancel.cpp                           |    7 +-
 tests/io/test_io.cpp                               |    1 -
 tests/io/test_io_sizes.cpp                         |   15 +-
 tests/io/test_mmap.cpp                             |    4 +-
 tests/io/test_sim_disk.cpp                         |   11 +-
 tests/mng/test_block_manager.cpp                   |    2 -
 tests/mng/test_block_manager1.cpp                  |    1 -
 tests/mng/test_block_manager2.cpp                  |    4 +-
 tests/mng/test_bmlayer.cpp                         |    2 -
 tests/mng/test_buf_streams.cpp                     |    1 -
 tests/stream/test_loop.cpp                         |    5 +-
 tests/stream/test_naive_transpose.cpp              |    5 -
 tests/stream/test_sorted_runs.cpp                  |    1 -
 tests/stream/test_stream.cpp                       |   14 -
 tests/stream/test_stream1.cpp                      |    3 +-
 tools/benchmark_disks.cpp                          |  102 +-
 tools/benchmark_disks_random.cpp                   |    8 +-
 tools/benchmark_files.cpp                          |   89 +-
 tools/benchmark_pqueue.cpp                         |   26 +-
 tools/benchmark_sort.cpp                           |   17 +-
 tools/benchmarks/CMakeLists.txt                    |    8 +-
 tools/benchmarks/benchmark_naive_matrix.cpp        |    1 -
 tools/benchmarks/berkeley_db_benchmark.cpp         |   54 +-
 tools/benchmarks/matrix_benchmark.cpp              |    4 +-
 tools/benchmarks/monotonic_pq.cpp                  |   19 +-
 tools/benchmarks/pq_benchmark.cpp                  |    9 -
 tools/benchmarks/stack_benchmark.cpp               |    6 -
 tools/benchmarks/tpie_stack_benchmark.cpp          |    8 +-
 tools/create_files.cpp                             |   31 +-
 tools/extras/CMakeLists.txt                        |    6 +-
 tools/extras/benchmark_disk_and_flash.cpp          |   10 +-
 tools/extras/iobench_scatter_in_place.cpp          |   18 +-
 tools/mallinfo.cpp                                 |    2 +-
 tools/mlock.cpp                                    |    2 +-
 tools/stxxl_tool.cpp                               |    8 +-
 273 files changed, 13025 insertions(+), 5225 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index 0b6788a..1c88330 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,5 +1,6 @@
 Andreas Beckmann <beckmann at cs.uni-frankfurt.de>
 Daniel Feist <daniel.feist at student.kit.edu>
+Daniel Godas-Lopez <dgodas at gmail.com>
 Ilja Andronov <sni4ok at yandex.ru>
 Jaroslaw Fedorowicz <fedorow at cs.uni-frankfurt.de>
 Jens Mehnert <jmehnert at mpi-sb.mpg.de>
diff --git a/CHANGELOG b/CHANGELOG
index bef8150..28d387d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,41 @@
+Version 1.4.1 (29 October 2014)
+
+* support kernel based asynchronous I/O on Linux (new file type "linuxaio"),
+  which exploits Native Command Queuing (NCQ) if available.
+  disable/enable with the define STXXL_FILE_LINUXAIO 0/1 via cmake
+
+* adding new disk_config entry device_id, which specifies the physical device
+  id of the "disk" used during prefetching sequence calculations. This used to
+  be identical with the queue id, however, for linuxaio there is only one
+  queue; thus the distinction had to be made. In a default config, no changes
+  are necessary, as the device_id parameter is automatically enumerated.
+
+* adding stxxl::binary_buffer which can be used for compact serialization and
+  reading via a stxxl::binary_reader cursor interface.
+
+* stxxl::unordered_map is a hash map, backed by external memory. It probably
+  only works well when lots of internal memory is used to buffer access to a
+  relatively small working set. Then, however, fast direct in-memory item
+  access can be used.
+
+* stxxl::external_shared_ptr is a proxy class to allow use of shared_ptr
+  classes inside stxxl containers. Reference counts are kept in memory, while
+  data may be swapped out to disk.
+
+* removing struct default_completion_handler, using a NULL pointer in default
+  complete handler instead, since otherwise a default_completion_handler
+  objects is cloned for _each_io_request_! Using a NULL pointer avoids
+  superfluous new/delete work on the heap.
+
+* minor changes:
+  - disable TPIE benchmarks by default, removing a warning.
+  - compilation and tests work under MinGW 64-bit with special threads.
+  - fixed compilation on 32-bit systems, everything is -Wconversion safe.
+  - adding 32-bit and 64-bit cmdline_parser::add_bytes() variants.
+  - removing all double underscores.
+  - use atomic_counted_object in class file for request reference counting.
+  - adding local/test2.cpp containing a stxxl::sorter example.
+
 Version 1.4.0 (12 December 2013)
 
 * Reorganized Directory Hierarchy
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3c49420..f7301a4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,7 @@
 #
 #  Part of the STXXL. See http://stxxl.sourceforge.net
 #
-#  Copyright (C) 2013 Timo Bingmann <tb at panthema.net>
+#  Copyright (C) 2013-2014 Timo Bingmann <tb at panthema.net>
 #
 #  Distributed under the Boost Software License, Version 1.0.
 #  (See accompanying file LICENSE_1_0.txt or copy at
@@ -25,7 +25,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/misc/cmake)
 
 # prohibit in-source builds
 if("${PROJECT_SOURCE_DIR}" STREQUAL "${PROJECT_BINARY_DIR}")
-  message(SEND_ERROR "In-source builds are not allowed.")
+  message(FATAL_ERROR "In-source builds are not allowed, use a separate build directory.")
 endif()
 
 # default to Debug building for single-config generators
@@ -37,15 +37,26 @@ endif()
 # STXXL version string
 set(STXXL_VERSION_MAJOR "1")
 set(STXXL_VERSION_MINOR "4")
-set(STXXL_VERSION_PATCH "0")
+set(STXXL_VERSION_PATCH "1")
 set(STXXL_VERSION_STRING "${STXXL_VERSION_MAJOR}.${STXXL_VERSION_MINOR}.${STXXL_VERSION_PATCH}")
 set(STXXL_VERSION_PHASE "prerelease/${CMAKE_BUILD_TYPE}")
 
-# read .git directory (if it exists) and find git sha
-include(GetGitRevisionDescription)
-get_git_head_revision(STXXL_VERSION_GIT_REFSPEC STXXL_VERSION_GIT_SHA1)
-if(STXXL_VERSION_GIT_REFSPEC)
-  message(STATUS "Detected git refspec ${STXXL_VERSION_GIT_REFSPEC} sha ${STXXL_VERSION_GIT_SHA1}")
+# read git directory (if it exists) and find git sha
+if(EXISTS ${PROJECT_SOURCE_DIR}/.git)
+  find_package(Git)
+  if(GIT_FOUND)
+    execute_process(COMMAND ${GIT_EXECUTABLE} describe HEAD
+      WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
+      OUTPUT_VARIABLE "STXXL_VERSION_GIT_REFSPEC"
+      ERROR_QUIET
+      OUTPUT_STRIP_TRAILING_WHITESPACE)
+    execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse HEAD
+      WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
+      OUTPUT_VARIABLE "STXXL_VERSION_GIT_SHA1"
+      ERROR_QUIET
+      OUTPUT_STRIP_TRAILING_WHITESPACE)
+    message(STATUS "Detected git refspec ${STXXL_VERSION_GIT_REFSPEC} sha ${STXXL_VERSION_GIT_SHA1}")
+  endif()
 endif()
 
 ###############################################################################
@@ -85,6 +96,9 @@ option(USE_VALGRIND "Run tests with valgrind, pre-initialize some memory buffers
 
 option(USE_GCOV "Compile and run tests with gcov for coverage analysis." OFF)
 
+# see tools/benchmarks about older TPIE benchmarks.
+option(USE_TPIE "Try to compile extra benchmarks from the 2007 S&PE paper with an old TPIE version." OFF)
+
 ### building shared and/or static libraries
 
 # by default we currently only build a static library, since we do not aim to
@@ -215,6 +229,18 @@ elseif(NOT NO_CXX11)
       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
     endif()
   endif()
+  # on MacOSX with clang we need to use libc++ for C++11 headers
+  if(APPLE)
+    if (CMAKE_CXX_COMPILER MATCHES ".*clang[+][+]"
+        OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+      check_cxx_compiler_flag(-stdlib=libc++ CXX_HAS_STDLIB_LIBCXX)
+      if(CXX_HAS_STDLIB_LIBCXX)
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
+      else()
+        message(SEND_ERROR "Compilation on MacOSX with clang requires libc++.")
+      endif()
+    endif()
+  endif(APPLE)
 endif()
 
 # check C++ compiler for C++11 features
@@ -222,7 +248,7 @@ endif()
 include(CheckCXXSourceCompiles)
 check_cxx_source_compiles(
   "#include <vector>
-   int main() { std::vector<int> v(42); for (auto i : v) { ++i; } return 0; }"
+  int main() { std::vector<int> v(42); for (auto i : v) { ++i; } return 0; }"
   STXXL_HAVE_CXX11_RANGE_FOR_LOOP)
 
 ###############################################################################
@@ -242,6 +268,11 @@ if(USE_GCOV)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
   set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lgcov")
 
+  # add cached variable containing parameters for lcov/genhtml
+  set(LCOV_FLAGS "" CACHE STRING "parameters for lcov")
+  set(GENHTML_FLAGS --legend --no-branch-coverage
+    CACHE STRING "parameters for genhtml")
+
   # custom target to run before tests
   add_custom_target(lcov-reset
     COMMAND ${LCOV} -q --directory ${CMAKE_BINARY_DIR} --zerocounters
@@ -262,8 +293,8 @@ if(USE_GCOV)
   # command sequence to gather, clean and generate HTML coverage report
   add_custom_target(lcov-html
     COMMAND ${LCOV} -q --directory . --capture --output-file lcov.info
-    COMMAND ${LCOV} -q --remove lcov.info 'tests/*' 'examples/*' '/usr/*' --output-file lcov-clean.info
-    COMMAND ${GENHTML} -q -o coverage --title "STXXL ${GITDESC}" --prefix ${PROJECT_SOURCE_DIR} --legend lcov-clean.info
+    COMMAND ${LCOV} -q --remove lcov.info '/usr/*' ${LCOV_FLAGS} --output-file lcov-clean.info
+    COMMAND ${GENHTML} -q -o coverage --title "STXXL ${GITDESC}" --prefix ${PROJECT_SOURCE_DIR} ${GENHTML_FLAGS} lcov-clean.info
     COMMENT "Capturing code coverage counters and create HTML coverage report"
     WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
 
@@ -280,13 +311,30 @@ endif(USE_GCOV)
 # for testing for c++ system include files
 include(CheckIncludeFileCXX)
 
+check_include_file_cxx(pthread.h HAVE_PTHREAD_H)
+
+if(MINGW AND NOT HAVE_PTHREAD_H)
+  set(USE_STD_THREADS ON)
+endif()
+
 if(MSVC OR USE_STD_THREADS)
 
-  check_include_file_cxx(thread HAVE_STD_THREAD_H)
-  check_include_file_cxx(mutex HAVE_STD_MUTEX_H)
+  # check for std::mutex and std::threads avalability
+  check_cxx_source_compiles(
+    "#include <mutex>
+    int main() { std::mutex mutex; mutex.lock(); return 0; }"
+    HAVE_STD_MUTEX)
 
-  if(HAVE_STD_THREAD_H AND HAVE_STD_MUTEX_H)
+  check_cxx_source_compiles(
+    "#include <thread>
+    int main() { std::thread t; return 0; }"
+    HAVE_STD_THREAD)
+
+  if(HAVE_STD_THREAD AND HAVE_STD_MUTEX)
     set(STXXL_STD_THREADS "1")
+  else()
+    set(USE_BOOST ON)
+    message("No std::thread and std::mutex found, trying to use Boost classes instead")
   endif()
 
   # using <thread> also requires -pthread on gcc
@@ -310,11 +358,11 @@ endif()
 include(TestLargeFiles)
 test_large_files(HAVE_LARGEFILES)
 
-if (HAVE_LARGEFILES)
+if(HAVE_LARGEFILES)
   add_definitions(-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES)
-else (NOT HAVE_LARGEFILES)
+else()
   message(FATAL_ERROR "Large file support was not detectable.")
-endif (HAVE_LARGEFILES)
+endif()
 
 ###############################################################################
 # check for O_DIRECT flag
@@ -322,7 +370,7 @@ endif (HAVE_LARGEFILES)
 if(CYGWIN)
   #-tb O_DIRECT messes up cygwin
   set(STXXL_DIRECT_IO_OFF 1)
-elseif(MSVC)
+elseif(MSVC OR MINGW)
   # have FILE_FLAG_NO_BUFFERING on Windows
   set(STXXL_DIRECT_IO_OFF 0)
 elseif(APPLE)
@@ -334,10 +382,10 @@ else()
   check_cxx_source_compiles("
 #include <unistd.h>
 #include <fcntl.h>
-int main(int argc, char * argv[]) { argc = O_DIRECT; }
+int main() { return ((int)O_DIRECT) != 0; }
 " STXXL_HAVE_O_DIRECT)
 
-  if (STXXL_HAVE_O_DIRECT)
+  if(STXXL_HAVE_O_DIRECT)
     set(STXXL_DIRECT_IO_OFF 0)
   else()
     set(STXXL_DIRECT_IO_OFF 1)
@@ -358,6 +406,21 @@ include(CheckSymbolExists)
 check_symbol_exists(mmap "sys/mman.h" STXXL_HAVE_MMAP_FILE)
 
 ###############################################################################
+# check for Linux aio syscalls
+
+include(CheckCXXSourceCompiles)
+check_cxx_source_compiles(
+  "#include <unistd.h>
+   #include <sys/syscall.h>
+   #include <linux/aio_abi.h>
+   int main() {
+       aio_context_t context;
+       long r = syscall(SYS_io_setup, 5, &context);
+       return (r == 0) ? 0 : -1;
+   }"
+   STXXL_HAVE_LINUXAIO_FILE)
+
+###############################################################################
 # check for an atomic add-and-fetch intrinsic for counting_ptr
 
 include(CheckCXXSourceCompiles)
@@ -374,7 +437,7 @@ if(USE_BOOST)
 
   if(WIN32)
     set(Boost_USE_STATIC_LIBS ON)
-  endif(WIN32)
+  endif()
 
   # first try to find the version
   find_package(Boost 1.34.1 REQUIRED)
@@ -421,7 +484,7 @@ if(USE_GNU_PARALLEL)
   else()
     check_include_file_cxx(parallel/algorithm HAVE_PARALLEL_ALGORITHM_H)
 
-    if (NOT HAVE_PARALLEL_ALGORITHM_H)
+    if(NOT HAVE_PARALLEL_ALGORITHM_H)
       message(FATAL_ERROR "GNU parallel mode header not found. Try compilation without parallel mode.")
     else()
       set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
@@ -564,7 +627,7 @@ macro(add_define PROGNAME)
     endif(BUILD_TESTS)
   endif()
 
-endmacro (add_define TESTNAME)
+endmacro(add_define TESTNAME)
 
 ###############################################################################
 # cmake script TRY_COMPILE all stxxl header files
@@ -580,11 +643,14 @@ if(TRY_COMPILE_HEADERS)
   list(SORT header_files)
 
   foreach(file ${header_files})
+    string(REPLACE "/" "_" compilename "${file}") # replace / to _ to fix warnings
+    string(REPLACE "." "_" compilename "${compilename}")
+
     check_cxx_source_compiles(
       "#include \"${file}\"
-      int main() { return 0; }" IsSelfContained-${file})
+      int main() { return 0; }" IsSelfContained${compilename})
 
-    if(NOT IsSelfContained-${file})
+    if(NOT IsSelfContained${compilename})
       message(SEND_ERROR "Compilation FAILED for ${file}\n\nCompiler output:\n${OUTPUT}")
     endif()
   endforeach()
diff --git a/Doxyfile b/Doxyfile
index a6f596e..6206722 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -32,7 +32,7 @@ PROJECT_NAME           = STXXL
 # This could be handy for archiving the generated documentation or
 # if some version control system is used.
 
-PROJECT_NUMBER         = 1.4.0
+PROJECT_NUMBER         = 1.4.1
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer
@@ -1576,6 +1576,7 @@ PREDEFINED             = "STXXL_BEGIN_NAMESPACE=namespace stxxl {" \
                          "STXXL_END_NAMESPACE=}" \
                          "STXXL_DEPRECATED(x)=x" \
                          "STXXL_HAVE_MMAP_FILE=1" \
+                         "STXXL_HAVE_LINUXAIO_FILE=1" \
                          "STXXL_WINDOWS=1" \
                          "STXXL_POSIX_THREADS=1" \
 
@@ -1784,7 +1785,7 @@ DIRECTORY_GRAPH        = NO
 # HTML_FILE_EXTENSION to xhtml in order to make the SVG files
 # visible in IE 9+ (other browsers do not have this requirement).
 
-DOT_IMAGE_FORMAT       = png
+DOT_IMAGE_FORMAT       = svg
 
 # If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
 # enable generation of interactive SVG images that allow zooming and panning.
diff --git a/TODO b/TODO
index e644188..4bb1e13 100644
--- a/TODO
+++ b/TODO
@@ -6,8 +6,6 @@
   max(size_at_program_start, configured_size)
   https://sourceforge.net/forum/message.php?msg_id=4925158
 
-* integrate unordered_map branch
-
 * allocation strategies: provide a method get_num_disks()
   and don't use stxxl::config::get_instance()->disks_number() inappropriately
 
@@ -30,11 +28,6 @@
   when distributing blocks to disks, or does load-balancing depending
   on the given speed of the disks
 
-* implement new disk queuing strategy that supports NCQ,
-  which is now widely available in HDDs/SSDs;
-  probably most interesting for rather small block sizes
-  (currently begin developed in branch kernelaio)
-
 * abstract away block manager so every container can attach to a file.
 
 * retry incomplete I/Os for all file types (currently only syscall)
diff --git a/doc/common.dox b/doc/common.dox
index 3f8a26a..2ebf0ab 100644
--- a/doc/common.dox
+++ b/doc/common.dox
@@ -26,6 +26,7 @@ A lots of basic utility classes and helper functions have accumulated in STXXL.
 - \subpage common_simple_vector "a non-growing, non-initializing simple_vector"
 - \subpage common_counting_ptr "reference counted (shared) objects via counting_ptr"
 - \subpage common_cmdline "command line parser"
+- \subpage common_binary_buffer "serialization of variable data structures into blobs"
 - \subpage common_thread_sync "synchronization primitives for multi-threading"
 - \subpage common_logging "logging macros"
 - \subpage common_assert "macros for checking assertions"
@@ -208,6 +209,26 @@ This example is documented in \ref common_cmdline tutorial.
 
 ////////////////////////////////////////////////////////////////////////////////
 
+/** \page common_binary_buffer Serializing Variable Data Structures with binary_buffer
+
+Some applications of STXXL will require variable data structures. Currently there is not much support for this in STXXL.
+
+For serializing information into in-memory data blocks, the STXXL provides the helper classes \ref binary_buffer and \ref binary_reader. These provide functions \ref binary_buffer::put<>() to append arbitrary integral data types and \ref binary_reader::get<>() to read these again. Serialization and deserialization of variable data structures are then composed of identical sequences of put()/get().
+
+Additionally, the classes already provide methods to serialize variable length strings (together with their lengths), and thereby also sub-block serialization. These functions are called \ref binary_buffer::put_string() and \ref binary_reader::get_string().
+
+Furthermore, to squeeze small integers into fewer bytes, they classes also contain "varint" encoding, where each byte contains 7 data bits and one continuation bit. These functions are called \ref binary_buffer::put_varint() and \ref binary_reader::get_varint().
+
+The following example fills a binary_buffer with some data elements:
+\snippet tests/common/test_binary_buffer.cpp serialize
+
+And the following binary_reader example deserializes the data elements and check's their content.
+\snippet tests/common/test_binary_buffer.cpp deserialize
+
+*/
+
+////////////////////////////////////////////////////////////////////////////////
+
 /** \page common_thread_sync Synchronization Primitives for Multi-Threading
 
 To support multi-threading, some parts of STXXL use synchronization primitives to ensure correct results. The primitives are based either on pthreads or on Boost classes.
diff --git a/doc/design.dox b/doc/design.dox
index 444905a..f406320 100644
--- a/doc/design.dox
+++ b/doc/design.dox
@@ -93,7 +93,7 @@ The external memory manager (object \ref stxxl::block_manager) is responsible fo
 
 On allocation requests, the \ref stxxl::block_manager returns \ref stxxl::BID objects -- Block IDentifiers. An object of the type \ref stxxl::BID describes the physical location of an allocated block, including the disk and offset of a region of storage on disk. One can load or store the data that resides at the location given by the \ref stxxl::BID using asynchronous \c read and \c write methods of a \ref stxxl::typed_block object.
 
-The full signature of the STXXL "block of elements" class is \ref stxxl::typed_block<RawSize,T,NRef,InfoType>. The C++ template parameter RawSize defines the total size of the block in bytes. Since block size is not a single global constant in the STXXL namespace, a programmer can simultaneously operate with several block types having different blocks sizes. Such flexibility is often required for good performance. For example, B+-tree leaves might have a size different from the size of t [...]
+The full signature of the STXXL "block of elements" class is \ref stxxl::typed_block  The C++ template parameter RawSize defines the total size of the block in bytes. Since block size is not a single global constant in the STXXL namespace, a programmer can simultaneously operate with several block types having different blocks sizes. Such flexibility is often required for good performance. For example, B+-tree leaves might have a size different from the size of the internal nodes. We hav [...]
 
 In the following listing, we give an example of how to program block I/O using objects of the BM layer. In line 2 we define the type of block: its size is one megabyte and the type of elements is \c double. The pointer to the only instance of the singleton object \ref stxxl::block_manager is obtained in line 5. Line 6 asks the block manager to allocate 32 blocks in external memory. The <tt>new_blocks</tt> call writes the allocated BIDs to the output iterator, given by the last parameter. [...]
 
@@ -162,6 +162,7 @@ STXXL library was designed to ease the access to external memory algorithms and
 - \subpage design_queue "stxxl::queue"
 - \subpage design_deque "stxxl::deque"
 - \subpage design_map "stxxl::map"
+- \subpage design_unordered_map "stxxl::unordered_map"
 
 Beyond these, STXXL also provides a set of containers that are not part of the STL:
 
@@ -503,6 +504,14 @@ Our design allows to use different implementations for leaves and (internal) nod
 
 */
 
+/** \page design_unordered_map Unordered Map
+
+There is currently no documentation here, see the tutorial \ref tutorial_unordered_map for some notes.
+
+The unordered_map/hash map library was created as a student project at the University of Karlsruhe. If you can read German, contact the maintainers for a copy of the student thesis.
+
+*/
+
 /** \page design_matrix Matrix
 
 Currently no documentation here.
@@ -842,11 +851,11 @@ Example:
 struct MyType
 {
     typedef unsigned long long key_type;
-    key_type _key;
-    char _data[32];
+    key_type m_key;
+    char m_data[32];
     MyType() {}
-    MyType(key_type __key):_key(__key) {}
-    key_type key() { return _key; }
+    MyType(key_type k) : m_key(k) {}
+    key_type key() { return m_key; }
     MyType min_value() const
     { return MyType( std::numeric_limits<key_type>::min() ); }
     MyType max_value() const
@@ -872,16 +881,16 @@ A key extractor object for ordering elements having 64 bit integer keys:
 struct MyType
 {
     typedef unsigned long long key_type;
-    key_type _key;
-    char _data[32];
+    key_type m_key;
+    char m_data[32];
     MyType() {}
-    MyType(key_type __key):_key(__key) {}
+    MyType(key_type k) : m_key(k) {}
 };
 struct GetKey
 {
     typedef MyType::key_type key_type;
     key_type operator() (const MyType & obj)
-    { return obj._key; }
+    { return obj.m_key; }
     MyType min_value() const
     { return MyType( std::numeric_limits<key_type>::min() ); }
     MyType max_value() const
@@ -931,11 +940,11 @@ The same as for \ref design_algo_sort "stxxl::sort".
 struct MyType
 {
     typedef unsigned long long key_type;
-    key_type _key;
-    char _data[32];
+    key_type m_key;
+    char m_data[32];
     MyType() {}
-    MyType(key_type __key):_key(__key) {}
-    key_type key() { return obj._key; }
+    MyType(key_type k) : m_key(k) {}
+    key_type key() { return obj.m_key; }
     static MyType min_value() const
     { return MyType( std::numeric_limits<key_type>::min() ); }
     static MyType max_value() const
diff --git a/doc/faq.dox b/doc/faq.dox
index b774b2d..603e6a3 100644
--- a/doc/faq.dox
+++ b/doc/faq.dox
@@ -28,24 +28,27 @@ Please note that from STXXL 1.4.0 on, only 64-bit systems are fully supported. C
 
 The compilers marked with '*' are the maintainers' favorite choices and are most thoroughly tested.
 
-compiler              | supported options
---------------------- | --------------------------
-gcc 4.8.2             | stxxl parallel (boost) (c++0x)
-gcc 4.7.3 *           | stxxl parallel (boost) (c++0x)
-gcc 4.6.4             | stxxl parallel (boost) (c++0x)
-gcc 4.5.4             | stxxl parallel (boost) (c++0x)
-gcc 4.4.7             | stxxl parallel (boost) (c++0x)
-gcc 4.3.6             | stxxl (boost)
-gcc 4.1.2             | stxxl (boost)
-gcc 3.4.6             | stxxl (boost)
-gcc 3.3               | unsupported
-icpc 2013.5.192 *     | stxxl (boost) (c++0x)
-icpc 2011.13.367      | stxxl (boost) (c++0x)
-clang++ 3.1, 3.2, 3.3 | stxxl (boost) (c++0x)
-cygwin gcc 4.8.2      | stxxl parallel (boost) (c++0x)
-msvc 2013 12.0 *      | stxxl (boost) (c++11)
-msvc 2012 11.0        | stxxl (boost) (c++0x)
-msvc 2010 10.0        | stxxl boost required
+compiler                | supported options
+----------------------- | --------------------------
+gcc 4.9.1               | stxxl parallel (boost) (c++11)
+gcc 4.8.3 *             | stxxl parallel (boost) (c++11)
+gcc 4.7.3               | stxxl parallel (boost) (c++0x)
+gcc 4.6.4               | stxxl parallel (boost) (c++0x)
+gcc 4.5.4               | stxxl parallel (boost) (c++0x)
+gcc 4.4.7               | stxxl parallel (boost) (c++0x)
+gcc 4.3.6               | stxxl (boost)
+gcc 4.1.2               | stxxl (boost)
+gcc 3.4.6               | stxxl (boost)
+gcc 3.3                 | unsupported
+icpc 2015.0.090 *       | stxxl (boost) (c++0x)
+icpc 2013.5.192 *       | stxxl (boost) (c++0x)
+icpc 2011.13.367        | stxxl (boost) (c++0x)
+clang++ 3.2, 3.3, 3.4.2 | stxxl (boost) (c++0x)
+mingw-w64 gcc 4.8.3     | stxxl parallel (boost) (c++11)
+cygwin gcc 4.8.3        | stxxl parallel (boost) (c++11)
+msvc 2013 12.0 *        | stxxl (boost) (c++11)
+msvc 2012 11.0          | stxxl (boost) (c++0x)
+msvc 2010 10.0          | stxxl boost required
 
 - The option "parallel" uses the __gnu_parallel extensions in some parts of STXXL. For all \c gcc versions >= 4.4 the __gnu_parallel extensions are ON by default. Support for MCSTL (predecessor of __gnu_parallel) was removed in STXXL 1.4.0.
 
@@ -104,11 +107,12 @@ This is a design choice, having the data structures thread-safe would mean a sig
 
 \section faq_diskalloc Disk Allocation on Multiple Disks
 
-I have configured several disks to use with STXXL. Why does STXXL fail complaining about the lack of space? According to my calclulations, the space on the disks should be sufficient.
+Q: I have configured several disks to use with STXXL. Why does STXXL fail complaining about the <b>lack of space</b>? According to my calclulations, the space on the disks should be sufficient.
 
-This may happen if the disks have different size. With the default parameters \c STXXL containers use randomized block-to-disk allocation strategies
-that distribute data evenly between the disks but ignore the availability of free space on them. 
+A: This may happen if the disks have <b>different size</b>. With the default parameters \c STXXL containers use randomized block-to-disk allocation strategies
+that distribute data evenly between the disks but ignore the availability of free space on them. Thus when the smallest disk is full, the program will abort because it cannot grow the file on that disk.
 
+A2: This round-robin disk allocation is due to the history of STXXL's support for parallel disk algorithms. It would be great if someone would contribute a patch for this issue. This would require adapting stxxl::disk_allocator and stxxl::block_manager to skip full disks when allocating new blocks.
 
 \section faq_msclr STXXL in a Microsoft CLR Library
 
diff --git a/doc/images/layer_diagram.pdf b/doc/images/layer_diagram.pdf
index ce78dff..2a2e7b3 100644
Binary files a/doc/images/layer_diagram.pdf and b/doc/images/layer_diagram.pdf differ
diff --git a/doc/images/layer_diagram.png b/doc/images/layer_diagram.png
index fbb2110..463f6a3 100644
Binary files a/doc/images/layer_diagram.png and b/doc/images/layer_diagram.png differ
diff --git a/doc/images/layer_diagram.svg b/doc/images/layer_diagram.svg
index 689aa80..a067784 100644
--- a/doc/images/layer_diagram.svg
+++ b/doc/images/layer_diagram.svg
@@ -18,9 +18,9 @@
    viewBox="1441 1382 8577.2987 6161.3924"
    id="svg2837"
    version="1.1"
-   inkscape:version="0.48.0 r9654"
+   inkscape:version="0.48.4 r9939"
    sodipodi:docname="layer_diagram.svg"
-   inkscape:export-filename="/home/singler/code/stxxl/trunk/images/layer_diagram.png"
+   inkscape:export-filename="layer_diagram.png"
    inkscape:export-xdpi="82.480003"
    inkscape:export-ydpi="82.480003">
   <metadata
@@ -264,15 +264,15 @@
      guidetolerance="10"
      inkscape:pageopacity="0"
      inkscape:pageshadow="2"
-     inkscape:window-width="1272"
-     inkscape:window-height="936"
+     inkscape:window-width="1918"
+     inkscape:window-height="1180"
      id="namedview3055"
      showgrid="false"
-     inkscape:zoom="0.82731493"
-     inkscape:cx="321.87004"
-     inkscape:cy="226.51891"
-     inkscape:window-x="0"
-     inkscape:window-y="0"
+     inkscape:zoom="1.6546299"
+     inkscape:cx="272.96192"
+     inkscape:cy="232.69444"
+     inkscape:window-x="1920"
+     inkscape:window-y="18"
      inkscape:window-maximized="1"
      inkscape:current-layer="svg2837"
      units="mm"
@@ -280,26 +280,30 @@
      fit-margin-left="3"
      fit-margin-right="3"
      fit-margin-bottom="3" />
-  <text
-     xml:space="preserve"
-     x="-4696.7891"
-     y="2153.5166"
-     font-style="normal"
-     font-weight="bold"
-     font-size="441"
-     id="text2993"
-     style="font-size:441px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:Helvetica"
-     transform="matrix(-3.6732199e-6,-1,1,-3.6732199e-6,0,0)">TXXL</text>
-  <text
-     xml:space="preserve"
-     x="-5454.3784"
-     y="2173.2498"
-     font-style="normal"
-     font-weight="bold"
-     font-size="567"
-     id="text2997"
-     style="font-size:567px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:Helvetica"
-     transform="matrix(-1.101966e-5,-1,1,-1.101966e-5,0,0)">S</text>
+  <g
+     id="g3088"
+     transform="translate(0,-391.09186)">
+    <text
+       transform="matrix(-3.6732199e-6,-1,1,-3.6732199e-6,0,0)"
+       style="font-size:441px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:none;font-family:Helvetica"
+       id="text2993"
+       font-size="441"
+       font-weight="bold"
+       font-style="normal"
+       y="2153.5166"
+       x="-4696.7891"
+       xml:space="preserve">TXXL</text>
+    <text
+       transform="matrix(-1.101966e-5,-1,1,-1.101966e-5,0,0)"
+       style="font-size:567px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:none;font-family:Helvetica"
+       id="text2997"
+       font-size="567"
+       font-weight="bold"
+       font-style="normal"
+       y="2173.2498"
+       x="-5454.3784"
+       xml:space="preserve">S</text>
+  </g>
   <rect
      id="rect2999"
      style="color:#000000;fill:#9eb0d5;fill-opacity:1;fill-rule:nonzero;stroke:#000000;stroke-width:32.00000043;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
@@ -383,26 +387,26 @@
      points="6803,2881 9307,2881 9826,2881 "
      transform="translate(-66.465922,67.780745)" />
   <text
-     style="font-size:215px;font-style:normal;font-weight:normal;text-anchor:middle;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:AvantGarde"
+     style="font-size:215px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-anchor:middle;fill:#000000;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
      id="text3021"
      font-size="215"
      font-weight="normal"
      font-style="normal"
      y="6368.4375"
-     x="6074.5342"
+     x="6076.0845"
      xml:space="preserve"><tspan
-       style="font-size:265.33950806px"
+       style="font-size:265.33950806px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
        id="tspan3245">files, I/O requests, disk queues, completion handlers</tspan></text>
   <text
-     style="font-size:215px;font-style:normal;font-weight:normal;text-anchor:middle;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:AvantGarde"
+     style="font-size:215px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-anchor:middle;fill:#000000;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
      id="text3023"
      font-size="215"
      font-weight="normal"
      font-style="normal"
-     y="5131.1084"
-     x="6121.5342"
+     y="5136.7778"
+     x="6067.3408"
      xml:space="preserve"><tspan
-       style="font-size:265.33950806px"
+       style="font-size:265.33950806px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
        id="tspan3243">block prefetcher, buffered block writer</tspan></text>
   <text
      style="font-size:215px;font-style:normal;font-weight:normal;text-anchor:middle;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:AvantGarde"
@@ -414,157 +418,159 @@
      x="6074.5342"
      xml:space="preserve" />
   <text
-     style="font-size:265px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:Helvetica"
+     style="font-size:265px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:none;font-family:Helvetica"
      id="text3027"
      font-size="265"
      font-weight="bold"
      font-style="normal"
-     y="5849.4375"
-     x="6074.5342"
+     y="5860.7769"
+     x="6082.5166"
      xml:space="preserve"><tspan
-       style="font-size:371.47531128px"
+       style="font-size:371.47531128px;stroke:none"
        id="tspan3223">Asynchronous I/O Primitives</tspan></text>
   <text
-     style="font-size:265px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:Helvetica"
+     style="font-size:265px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:none;font-family:Helvetica"
      id="text3029"
      font-size="265"
      font-weight="bold"
      font-style="normal"
      y="4328.1084"
-     x="6121.5342"
+     x="6061.8369"
      xml:space="preserve"><tspan
-       style="font-size:371.47531128px"
+       style="font-size:371.47531128px;stroke:none"
        id="tspan3219">Block Management</tspan></text>
   <text
-     style="font-size:215px;font-style:normal;font-weight:normal;text-anchor:middle;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:AvantGarde"
+     style="font-size:215px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-anchor:middle;fill:#000000;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
      id="text3031"
      font-size="215"
      font-weight="normal"
      font-style="normal"
-     y="4801.1084"
-     x="6121.5342"
+     y="4840.7959"
+     x="6076.1714"
      xml:space="preserve"><tspan
-       style="font-size:265.33950806px"
+       style="font-size:265.33950806px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
        id="tspan3241">typed block, block manager, buffered streams,</tspan></text>
   <text
-     style="font-size:215px;font-style:normal;font-weight:normal;text-anchor:start;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:AvantGarde"
+     style="font-size:215px;font-style:normal;font-weight:normal;text-anchor:start;fill:#000000;stroke:none;font-family:AvantGarde"
      id="text3033"
      font-size="215"
      font-weight="normal"
      font-style="normal"
      y="3279.7798"
-     x="2796.8733"
+     x="2545.4448"
      xml:space="preserve"><tspan
-       style="font-size:265.33950806px"
+       style="font-size:265.33950806px;stroke:none"
        id="tspan3227">Containers:</tspan></text>
   <text
-     style="font-size:265px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:Helvetica"
+     style="font-size:265px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:none;font-family:Helvetica"
      id="text3035"
      font-size="265"
      font-weight="bold"
      font-style="normal"
-     y="2759.7798"
-     x="4468.5342"
+     y="2787.843"
+     x="4462.5898"
      xml:space="preserve"><tspan
-       style="font-size:371.47531128px"
+       style="font-size:371.47531128px;stroke:none"
        id="tspan3215">STL Interface</tspan></text>
   <text
-     style="font-size:164px;font-style:normal;font-weight:normal;text-anchor:start;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:AvantGarde"
+     style="font-size:164px;font-style:normal;font-weight:normal;text-anchor:start;fill:#000000;stroke:none;font-family:AvantGarde"
      id="text3037"
      font-size="164"
      font-weight="normal"
      font-style="normal"
-     y="3159.2458"
-     x="4638.876"
+     y="3179.291"
+     x="4228.4468"
      xml:space="preserve"><tspan
-       style="font-size:199.00462341px"
-       id="tspan3231">vector, stack, set</tspan></text>
+       style="font-size:199.00462341px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
+       id="tspan3231">vector, stack, set, map</tspan><tspan
+       style="font-size:199.00462341px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
+       id="tspan3257" /></text>
   <text
-     style="font-size:164px;font-style:normal;font-weight:normal;text-anchor:start;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:AvantGarde"
+     style="font-size:164px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-anchor:start;fill:#000000;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
      id="text3039"
      font-size="164"
      font-weight="normal"
      font-style="normal"
-     y="3374.7798"
-     x="4497.876"
+     y="3394.825"
+     x="4264.5664"
      xml:space="preserve"><tspan
-       style="font-size:199.00462341px"
-       id="tspan3233">priority_queue, map</tspan></text>
+       style="font-size:199.00462341px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
+       id="tspan3233">priority_queue, matrix</tspan></text>
   <text
-     style="font-size:164px;font-style:normal;font-weight:normal;text-anchor:start;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:AvantGarde"
+     style="font-size:164px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-anchor:start;fill:#000000;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
      id="text3041"
      font-size="164"
      font-weight="normal"
      font-style="normal"
-     y="3663.8477"
-     x="4449.876"
+     y="3651.8206"
+     x="4278.5337"
      xml:space="preserve"><tspan
-       style="font-size:199.00462341px"
+       style="font-size:199.00462341px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
        id="tspan3235">sort, for_each, merge</tspan></text>
   <text
-     style="font-size:215px;font-style:normal;font-weight:normal;text-anchor:start;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:AvantGarde"
+     style="font-size:215px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-anchor:start;fill:#000000;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
      id="text3043"
      font-size="215"
      font-weight="normal"
      font-style="normal"
-     y="3326.7798"
-     x="7326.8076"
+     y="3304.3862"
+     x="7151.0605"
      xml:space="preserve"><tspan
-       style="font-size:265.33950806px"
+       style="font-size:265.33950806px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
        id="tspan3237">Pipelined sorting,</tspan></text>
   <text
-     style="font-size:215px;font-style:normal;font-weight:normal;text-anchor:start;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:AvantGarde"
+     style="font-size:215px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-anchor:start;fill:#000000;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
      id="text3045"
      font-size="215"
      font-weight="normal"
      font-style="normal"
-     y="3610.7798"
-     x="7278.8076"
+     y="3592.3953"
+     x="7118.9316"
      xml:space="preserve"><tspan
-       style="font-size:265.33950806px"
+       style="font-size:265.33950806px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;stroke:none;font-family:TeX Gyre Adventor;-inkscape-font-specification:TeX Gyre Adventor"
        id="tspan3239">zero-I/O scanning</tspan></text>
   <text
-     style="font-size:265px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:Helvetica"
+     style="font-size:265px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:none;font-family:Helvetica"
      id="text3047"
      font-size="265"
      font-weight="bold"
      font-style="normal"
-     y="2759.7798"
-     x="8242.4658"
+     y="2775.8159"
+     x="8246.7646"
      xml:space="preserve"><tspan
-       style="font-size:371.47531128px"
+       style="font-size:371.47531128px;stroke:none"
        id="tspan3217">Pipelining</tspan></text>
   <text
-     style="font-size:215px;font-style:normal;font-weight:normal;text-anchor:start;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:AvantGarde"
+     style="font-size:215px;font-style:normal;font-weight:normal;text-anchor:start;fill:#000000;stroke:none;font-family:AvantGarde"
      id="text3049"
      font-size="215"
      font-weight="normal"
      font-style="normal"
      y="3663.8477"
-     x="2796.8733"
+     x="2599.8601"
      xml:space="preserve"><tspan
-       style="font-size:265.33950806px"
+       style="font-size:265.33950806px;stroke:none"
        id="tspan3229">Algorithms:</tspan></text>
   <text
-     style="font-size:265px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:Helvetica"
+     style="font-size:265px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:none;font-family:Helvetica"
      id="text3051"
      font-size="265"
      font-weight="bold"
      font-style="normal"
      y="7252.4941"
-     x="6027.5342"
+     x="6128.1587"
      xml:space="preserve"><tspan
-       style="font-size:371.47531128px"
+       style="font-size:371.47531128px;stroke:none"
        id="tspan3225">Operating System</tspan></text>
   <text
-     style="font-size:265px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:#000000;stroke-width:0.025in;font-family:Helvetica"
+     style="font-size:265px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#000000;stroke:none;font-family:Helvetica"
      id="text3053"
      font-size="265"
      font-weight="bold"
      font-style="normal"
-     y="1836.2465"
+     y="1861.7599"
      x="6054.0684"
      xml:space="preserve"><tspan
-       style="font-size:371.47531128px"
+       style="font-size:371.47531128px;stroke:none"
        id="tspan3221">Applications</tspan></text>
 </svg>
diff --git a/doc/images/layer_diagram_small.png b/doc/images/layer_diagram_small.png
index 66dd4f2..cdd5db0 100644
Binary files a/doc/images/layer_diagram_small.png and b/doc/images/layer_diagram_small.png differ
diff --git a/doc/install.dox b/doc/install.dox
index a3b95be..6457010 100644
--- a/doc/install.dox
+++ b/doc/install.dox
@@ -51,7 +51,7 @@ $ git clone http://github.com/stxxl/stxxl.git my-project
 \verbatim
 $ mkdir my-project/build
 $ cd my-project/build
-$ cmake -DCMAKE_BUILD_TYPE=Debug ..
+$ cmake ..
 <lots of output by cmake>
 $ make
 <lots of compilation messages>
@@ -69,6 +69,8 @@ For your own prototype project you can immediately start modifying \c test1.cpp
 
 The CMake file has many build options (see \ref install_build_options). Maybe the most important are \c BUILD_TESTS and \c BUILD_EXAMPLES. By setting them with <tt>"-DBUILD_EXAMPLES=ON -DBUILD_TESTS=ON"</tt> on the CMake line, additional subprojects are added to the build.
 
+By default, STXXL compiles in <tt>Debug</tt> mode and includes many assertions and run-time checks, which typically slow down performance dramatically. To use STXXL at **full speed**, please set the build type to <tt>Release</tt> by adding <tt>-DCMAKE_BUILD_TYPE=Release</tt> to the cmake line. 
+
 \section install_unix_subproject Including STXXL as a CMake Subproject
 
 The second method is for including STXXL in a larger program as a subproject. This is particularly easy with CMake: one can just \c add_directory(stxxl) in a CMakeLists.txt. The following guide shows how to start a simple CMake project and use STXXL in a subdirectory.
@@ -116,7 +118,7 @@ target_link_libraries(project ${STXXL_LIBRARIES})
 $ cp stxxl/local/test1.cpp main.cpp
 $ mkdir build
 $ cd build
-$ cmake -DCMAKE_BUILD_TYPE=Debug ..
+$ cmake ..
 <lots of output by cmake>
 $ make
 <lots of compilation messages>
@@ -160,7 +162,7 @@ See the README at http://github.com/stxxl/myproject
 
 # Create a Disk Configuration File
 
-For STXXL is function beyond very simple examples, you must define the \link install_config disk configuration file \endlink. The simplest method is to create a file named <b><tt>'.stxxl'</tt></b> the same directory as you execute the program. A basic configuration might be:
+For STXXL to function beyond very simple examples, you must define the \link install_config disk configuration file \endlink. The simplest method is to create a file named <b><tt>'.stxxl'</tt></b> the same directory as you execute the program. A basic configuration might be:
 \verbatim
 # file path,maximum capacity of the disk,access method
 disk=/tmp/stxxl,1G,syscall unlink
@@ -385,7 +387,7 @@ In summary, CMake only looks for Boost at a few locations: C:\\Boost for headers
 
 /** \page install_config Disk Configuration Files
 
-\author Timo Bingmann (2013)
+\author Timo Bingmann (2013-2014)
 
 A main feature of the STXXL is to take advantage of parallel access to <b>multiple disks</b>. For this, you must define the disk configuration in a text file, using the syntax described below. If no file is found at the locations below, STXXL will by default create a 1000 MiB file in \c /var/tmp/stxxl on Unix or in the user's temp directory on Windows.
 
@@ -423,12 +425,12 @@ disk=<path>,<capacity>,<fileio> <options>
 
 Description of the parameters:
 
-- <tt>\<path></tt> : full disk filename.
+- <b><tt>\<path></tt></b> : full disk filename.
   - In order to access disks STXXL uses <i>file-based access methods</i> (see below). Each disk is represented as a file
   - If you have a disk that is mounted in Unix to the path /mnt/disk0/, then the correct value for the \c full_disk_filename would be \c /mnt/disk0/some_file_name. \n
   - If the string contains <tt>"###"</tt> (three '#'), then these symbols are replaced by the current process id.
 
-- <tt>\<capacity></tt> : maximum capacity of the disk
+- <b><tt>\<capacity></tt></b> : maximum capacity of the disk
 
   - the following size suffixes are recognized:
     - \c K, \c M, \c G, \c T, \c P (powers of 10),
@@ -437,11 +439,14 @@ Description of the parameters:
 
   - 0 means autogrow, and the file will be deleted afterwards.
 
-- <tt>\<fileio></tt> : \c STXXL has a number of different file access implementations, choose one of them:
+- <b><tt>\<fileio></tt></b> : \c STXXL has a number of different file access implementations, choose one of them (recommended ones in bold):
 
-  - \c syscall : use \c read and \c write system calls which perform disk transfers directly on user memory pages without superfluous copying (currently the fastest method)
+  - \c **syscall** : use \c read and \c write system calls which perform disk transfers directly on user memory pages without superfluous copying (currently the fastest method)
 
-  - \c wincall : on Windows, use direct calls to the Windows API.
+  - \c **wincall** : on Windows, use direct calls to the Windows API.
+
+  - \c **linuxaio** : on Linux, use direct syscalls to the native Linux AIO interface. \n
+  The Linux AIO interface has the advantage of keeping an asynchronous queue inside the kernel. Multiple I/O requests are submitted to the kernel at once, thus the kernel can sort then using its disk schedulers and also forward them to the actual disks as asynchronous operations using NCQ (native command queuing) or TCQ (tagged command queueing).
 
   - \c memory : keeps all data in RAM, for quicker testing
 
@@ -455,25 +460,30 @@ Description of the parameters:
 
   - \c wbtl : library-based write-combining (good for writing small blocks onto SSDs), based on \c syscall
 
-- <tt>\<options></tt> : additional options for file access implementation. Not all are available for every fileio method. The option order is unimportant.
+- <b><tt>\<options></tt></b> : additional options for file access implementation. Not all are available for every fileio method. The option order is unimportant.
 
   - \c autogrow : enables automatic growth of the file beyond the specified capacity.
 
-  - \c direct, \c nodirect, \c direct=[off/try/on] : disable buffering in system cache by passing O_DIRECT or similar flag to open. \n
+  - \c **direct**, \c nodirect, \c direct=[off/try/on] : disable buffering in system cache by passing O_DIRECT or similar flag to open. \n
     This is \a recommended as it improves performance, however, not all filesystems support bypassing cache. With \c direct or \c direct=on, STXXL will fail without direct access. With \c nodirect or \c direct=off it is disabled. The default is \c direct=try , which first attempts to open with O_DIRECT and falls back to opening without if it fails.
 
-  - \c unlink (or \c unlink_on_open) : unlink the file from the fs immediately after creation. \n
+  - \c **unlink** (or \c unlink_on_open) : unlink the file from the fs immediately after creation. \n
     This is possible on Unix system, as the file descriptor is kept open. This method is \b preferred, because even in the case of a program segfault, the file data is cleaned up by the kernel.
 
-  - \c delete (or \c delete_on_exit) : delete file \a after the STXXL program exists \n
+  - \c **delete** (or \c delete_on_exit) : delete file \a after the STXXL program exists \n
     This is the more conservative version of unlink, which also works on Windows. However, if the program crashes, the file is not deleted.
 
-  - \c raw_device : fail if the opened path is not a raw block device. \n
+  - \c **raw_device** : fail if the opened path is not a raw block device. \n
     This flag is not required, raw devices are automatically detected.
 
   - \c queue=# : assign the disk to a specific I/O request queue and thread. \n
     Use this for multiple files that reside on the same physical disk.
 
+  - \c devid=# : assign the disk entry a specific physical device id. \n
+    Usually you can just omit the devid=# option, since disks are enumerated automatically. In sorting and other prefetched operations, the physical device id is used to schedule block transfers from independent devices. Thus you should label files/disks on the same physical devices with the same devid.
+
+  - \c queue_length=# : specify for linuxaio the desired queue inside the linux kernel using this option.
+
 Example:
 \verbatim
 disk=/data01/stxxl,500G,syscall unlink
@@ -486,6 +496,12 @@ disk=c:\stxxl.tmp,700G,wincall delete
 disk=d:\stxxl.tmp,200G,wincall delete
 \endverbatim
 
+On Linux you can try to take advantage of NCQ + Kernel AIO queues:
+\verbatim
+disk=/data01/stxxl,500G,linuxaio unlink
+disk=/data02/stxxl,300G,linuxaio unlink
+\endverbatim
+
 \section install_config_filesystem Recommended: File System XFS or Raw Block Devices
 
 The library benefits from direct transfers from user memory to disk, which saves superfluous copies.  We recommend to use the <a href="http://xfs.org">XFS  file system</a>, which gives good read and write performance for large files. Note that file creation speed of \c XFS is a bit slower, so that disk files should be precreated for optimal performance.
@@ -536,10 +552,10 @@ int main()
     disk1.direct = stxxl::disk_config::DIRECT_ON; // force O_DIRECT
 
     // add disk to config
-    config->add_disk(disk1);
+    cfg->add_disk(disk1);
 
     // add another disk
-    config->add_disk( disk_config("disk=/tmp/stxxl-2.tmp, 10 GiB, syscall unlink") );
+    cfg->add_disk( disk_config("disk=/tmp/stxxl-2.tmp, 10 GiB, syscall unlink") );
 
     // ... add more disks
 
diff --git a/doc/mainpage.dox b/doc/mainpage.dox
index f07c6c9..33851ed 100644
--- a/doc/mainpage.dox
+++ b/doc/mainpage.dox
@@ -81,7 +81,7 @@ Many people have contributed to STXXL, see all \ref authors.
 - \subpage authors
 - \subpage textfiles_install
 - \subpage license
-- \subpage todo
+- \subpage textfiles_todo
 
 \page readme README
 
@@ -105,9 +105,49 @@ The following list of authors have contributed to STXXL:
 
 \verbinclude LICENSE_1_0.txt
 
-\page todo TODO
+\page textfiles_todo TODO
 
 \verbinclude TODO
 
  */
 
+// Module Groups are defined here to fix their order:
+
+/*! \defgroup stllayer STL-User Layer
+  Layer which groups STL compatible algorithms and containers
+*/
+
+/*! \defgroup streampack Stream Package
+
+  Package that enables pipelining of consequent sorts and scans of the external data avoiding the saving the intermediate results on the disk, e.g. the output of a sort can be directly fed into a scan procedure without the need to save it on a disk.  All components of the package are contained in the \c stxxl::stream namespace.
+
+   STREAM ALGORITHM CONCEPT (Do not confuse with C++ input/output streams)
+
+\verbatim
+
+   struct stream_algorithm // stream, pipe, whatever
+   {
+     typedef some_type value_type;
+
+     const value_type & operator * () const; // return current element of the stream
+     stream_algorithm & operator ++ ();      // go to next element. precondition: empty() == false
+     bool empty() const;                     // return true if end of stream is reached
+
+   };
+\endverbatim
+*/
+
+/*! \defgroup mnglayer Block Management Layer
+
+  Group of classes which help controlling external memory space, managing disks, and allocating and deallocating blocks of external storage.
+*/
+
+/*! \defgroup iolayer I/O Primitives Layer
+
+  Group of classes which enable abstraction from operating system calls and support system-independent interfaces for asynchronous I/O.
+*/
+
+/*! \defgroup support Common Utilities and Support Classes
+
+Supporting classes also useful for applications, see also \ref common .
+*/
diff --git a/doc/tutorial.dox b/doc/tutorial.dox
index 5dcf22a..ffd747c 100644
--- a/doc/tutorial.dox
+++ b/doc/tutorial.dox
@@ -39,6 +39,7 @@ The practical part about it: STXXL containers which have an internal counterpart
 - \subpage tutorial_queue "stxxl::queue tutorial"
 - \subpage tutorial_deque "stxxl::deque tutorial"
 - \subpage tutorial_map "stxxl::map tutorial"
+- \subpage tutorial_unordered_map "stxxl::unordered_map tutorial"
 
 Beyond these, STXXL also provides a set of containers that are not part of the STL:
 
diff --git a/doc/tutorial_unordered_map.dox b/doc/tutorial_unordered_map.dox
new file mode 100644
index 0000000..af0b86d
--- /dev/null
+++ b/doc/tutorial_unordered_map.dox
@@ -0,0 +1,59 @@
+// -*- mode: c++; mode: visual-line; mode: flyspell; fill-column: 100000 -*-
+/***************************************************************************
+ *  doc/tutorial_unordered_map.dox
+ *
+ *  Usage Tutorial for STXXL
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2013 Timo Bingmann <tb at panthema.net>
+ *  Copyright (C) 2013 Daniel Feist <daniel.feist at student.kit.edu>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+namespace stxxl {
+
+/** \page tutorial_unordered_map STXXL Unordered Map (Hash Map)
+
+This page introduces the **EXPERIMENTAL** stxxl::unordered_map which can be used in-lieu of std::unordered_map (for further information on the interface, refer to the API \ref stxxl::unordered_map).
+
+stxxl::unordered_map is an external memory hash map that stores elements formed by a combination of a unique key value and a data value, without any specific order. The main problem is that a hash map **ITSELF IS NOT VERY EFFICIENT** in external memory, since access to an element requires a random access to disk. **PLEASE CHECK** whether an ordered sequence, as provided by stxxl::map, may not be the better replacement for your application.  However, if you are willing to provide **a lot  [...]
+
+The implementation of the unordered hash_map is experimental, and help for improving, fixing bugs and writing documentation in it is very welcome. If you have an application, please consider **THROUGHLY TESTING** the implementation and patching problems.
+
+### Creating a STXXL Unordered Map
+
+To create a stxxl::unordered_map object, several template parameters are required. The first two parameters KeyType and MappedType, which are combined into a std::pair<int, char> in this example, are self-explanatory, the third parameter is a *hasher class* and the fourth has to be a *comparator class* which is used to determine whether a key is smaller than another one, the fifth and sixth parameters define the subblock- and block size (in subblock items).
+\snippet examples/containers/unordered_map1.cpp construction
+
+The hash function follows the standard std::hash signature, and returns a size_t:
+\snippet examples/containers/unordered_map1.cpp hash
+
+Instead of the **equality comparator** as required by the C++ standard, we require a **less comparator**, because the unordered_map **sorts** bulk insertions by hash value. A simple comparator looks like:
+\snippet examples/containers/unordered_map1.cpp comparator
+
+After construction, the standard operations of an unordered map are available as one would think, see below for a short example of some function.
+
+### Additional Implementation Notes
+
+ * The implementation contains some TODO items very relevant to performance. A potential heavy user should consider fixing these.
+
+ * As the btree, the unordered_map must keep an iterator map for updating items when they are swapped out to disk.
+
+TODO: write more information.
+
+### A minimal working example on STXXL Unordered Map
+
+(See \ref examples/containers/unordered_map1.cpp for the sourcecode of the following example).
+
+\snippet examples/containers/unordered_map1.cpp example
+
+\example examples/containers/unordered_map1.cpp
+This example code is explained in the \ref tutorial_unordered_map section
+
+*/
+
+} // namespace stxxl
diff --git a/examples/algo/copy_and_sort_file.cpp b/examples/algo/copy_and_sort_file.cpp
index 2898a98..13d45da 100644
--- a/examples/algo/copy_and_sort_file.cpp
+++ b/examples/algo/copy_and_sort_file.cpp
@@ -23,20 +23,15 @@
 #include <stxxl/vector>
 #include <stxxl/stream>
 
-
 struct my_type
 {
     typedef unsigned key_type;
 
-    key_type _key;
-    char _data[128 - sizeof(key_type)];
-    key_type key() const
-    {
-        return _key;
-    }
+    key_type m_key;
+    char m_data[128 - sizeof(key_type)];
 
     my_type() { }
-    my_type(key_type __key) : _key(__key) { }
+    my_type(key_type k) : m_key(k) { }
 
     static my_type min_value()
     {
@@ -48,15 +43,14 @@ struct my_type
     }
 };
 
-
 inline bool operator < (const my_type& a, const my_type& b)
 {
-    return a.key() < b.key();
+    return a.m_key < b.m_key;
 }
 
 inline bool operator == (const my_type& a, const my_type& b)
 {
-    return a.key() == b.key();
+    return a.m_key == b.m_key;
 }
 
 struct Cmp
@@ -80,11 +74,10 @@ struct Cmp
 
 std::ostream& operator << (std::ostream& o, const my_type& obj)
 {
-    o << obj._key;
+    o << obj.m_key;
     return o;
 }
 
-
 int main(int argc, char** argv)
 {
     if (argc < 3)
@@ -93,8 +86,8 @@ int main(int argc, char** argv)
         return -1;
     }
 
-    const unsigned memory_to_use = 512 * 1024 * 1024;
-    const unsigned int block_size = sizeof(my_type) * 4096;
+    const stxxl::internal_size_type memory_to_use = 512 * 1024 * 1024;
+    const stxxl::internal_size_type block_size = sizeof(my_type) * 4096;
 
     typedef stxxl::vector<my_type, 1, stxxl::lru_pager<2>, block_size> vector_type;
 
@@ -104,11 +97,7 @@ int main(int argc, char** argv)
     vector_type output(&out_file);
     output.resize(input.size());
 
-#if STXXL_MSVC
     typedef stxxl::stream::streamify_traits<vector_type::iterator>::stream_type input_stream_type;
-#else
-    typedef __typeof__ (stxxl::stream::streamify(input.begin(), input.end())) input_stream_type;
-#endif
     input_stream_type input_stream = stxxl::stream::streamify(input.begin(), input.end());
 
     typedef Cmp comparator_type;
diff --git a/examples/algo/phonebills.cpp b/examples/algo/phonebills.cpp
index 2fc7546..8921aee 100644
--- a/examples/algo/phonebills.cpp
+++ b/examples/algo/phonebills.cpp
@@ -119,7 +119,6 @@ struct SortByCaller
     }
 };
 
-
 void print_usage(const char* program)
 {
     std::cout << "Usage: " << program << " logfile main billfile" << std::endl;
@@ -160,7 +159,7 @@ int main(int argc, char* argv[])
     std::for_each(v.begin(), v.end(), ProduceBill(out));
 
 #else
-    const unsigned M = atol(argv[2]) * 1024 * 1024;
+    const stxxl::internal_size_type M = atol(argv[2]) * 1024 * 1024;
 
     stxxl::sort(v.begin(), v.end(), SortByCaller(), M);
     std::fstream out(argv[3], std::ios::out);
diff --git a/examples/algo/phonebills_genlog.cpp b/examples/algo/phonebills_genlog.cpp
index e1fc9ae..996ac1c 100644
--- a/examples/algo/phonebills_genlog.cpp
+++ b/examples/algo/phonebills_genlog.cpp
@@ -55,7 +55,6 @@ std::ostream& operator << (std::ostream& i, const LogEntry& entry)
     return i;
 }
 
-
 int main(int argc, char* argv[])
 {
     if (argc < 5)
@@ -68,9 +67,9 @@ int main(int argc, char* argv[])
 
         return 0;
     }
-    unsigned M = atol(argv[3]) * 1024 * 1024;
+    stxxl::internal_size_type M = atol(argv[3]) * 1024 * 1024;
     const stxxl::uint64 ncalls = stxxl::atouint64(argv[1]);
-    const int av_calls = atol(argv[2]);
+    const long av_calls = atol(argv[2]);
     const stxxl::uint64 nclients = ncalls / av_calls;
     stxxl::uint64 calls_made = 0;
 
@@ -93,7 +92,7 @@ int main(int argc, char* argv[])
 
         while (serv-- > 0)
         {
-            cur += 1 + rnd(3600 * 24);
+            cur += (time_t)(1 + rnd(3600 * 24));
 
             e.to = rnd(nclients);
             e.timestamp = cur;
@@ -102,7 +101,7 @@ int main(int argc, char* argv[])
             e.event = 1;
             log.push_back(e);
 
-            cur += 1 + rnd(1800);
+            cur += (time_t)(1 + rnd(1800));
             e.timestamp = cur;
             e.event = 2;
 
diff --git a/examples/algo/sort_file.cpp b/examples/algo/sort_file.cpp
index 9181852..74c6aa2 100644
--- a/examples/algo/sort_file.cpp
+++ b/examples/algo/sort_file.cpp
@@ -22,20 +22,20 @@
 #include <stxxl/stable_ksort>
 #include <stxxl/vector>
 
-
 struct my_type
 {
     typedef unsigned key_type;
 
-    key_type _key;
-    char _data[128 - sizeof(key_type)];
+    key_type m_key;
+    char m_data[128 - sizeof(key_type)];
+
     key_type key() const
     {
-        return _key;
+        return m_key;
     }
 
     my_type() { }
-    my_type(key_type __key) : _key(__key) { }
+    my_type(key_type k) : m_key(k) { }
 
     static my_type min_value()
     {
@@ -47,7 +47,6 @@ struct my_type
     }
 };
 
-
 inline bool operator < (const my_type& a, const my_type& b)
 {
     return a.key() < b.key();
@@ -79,7 +78,7 @@ struct Cmp
 
 std::ostream& operator << (std::ostream& o, const my_type& obj)
 {
-    o << obj._key;
+    o << obj.key();
     return o;
 }
 
@@ -92,24 +91,25 @@ int main(int argc, char** argv)
         return -1;
     }
 
-    const unsigned int block_size = sizeof(my_type) * 4096;
+    const stxxl::unsigned_type block_size = sizeof(my_type) * 4096;
+
     if (strcmp(argv[1], "generate") == 0) {
         const my_type::key_type num_elements = 1 * 1024 * 1024;
-        const unsigned int records_in_block = block_size / sizeof(my_type);
+        const stxxl::unsigned_type records_in_block = block_size / sizeof(my_type);
         stxxl::syscall_file f(argv[2], stxxl::file::CREAT | stxxl::file::RDWR);
-        my_type* array = (my_type*)stxxl::aligned_alloc<BLOCK_ALIGN>(block_size);
+        my_type* array = (my_type*)stxxl::aligned_alloc<STXXL_BLOCK_ALIGN>(block_size);
         memset(array, 0, block_size);
 
         my_type::key_type cur_key = num_elements;
         for (unsigned i = 0; i < num_elements / records_in_block; i++)
         {
             for (unsigned j = 0; j < records_in_block; j++)
-                array[j]._key = cur_key--;
+                array[j].m_key = cur_key--;
 
-            stxxl::request_ptr req = f.awrite((void*)array, stxxl::int64(i) * block_size, block_size, stxxl::default_completion_handler());
+            stxxl::request_ptr req = f.awrite((void*)array, stxxl::int64(i) * block_size, block_size);
             req->wait();
         }
-        stxxl::aligned_dealloc<BLOCK_ALIGN>(array);
+        stxxl::aligned_dealloc<STXXL_BLOCK_ALIGN>(array);
     } else {
 #if STXXL_PARALLEL_MULTIWAY_MERGE
         STXXL_MSG("STXXL_PARALLEL_MULTIWAY_MERGE");
diff --git a/examples/applications/skew3.cpp b/examples/applications/skew3.cpp
index b4a6824..3ca610d 100644
--- a/examples/applications/skew3.cpp
+++ b/examples/applications/skew3.cpp
@@ -38,11 +38,12 @@
 #include <stxxl/bits/common/uint_types.h>
 
 using stxxl::uint64;
+using stxxl::internal_size_type;
 using stxxl::external_size_type;
 namespace stream = stxxl::stream;
 
 // 1 GiB ram used by external data structures / 1 MiB block size
-uint64 ram_use = 1024 * 1024 * 1024;
+internal_size_type ram_use = 1024 * 1024 * 1024;
 
 // alphabet data type
 typedef unsigned char alphabet_type;
@@ -274,7 +275,6 @@ public:
         static value_type max_value() { return value_type::max_value(); }
     };
 
-
     /** Check, if last two components of tree quads are equal. */
     template <class quad_type>
     static inline bool quad_eq(const quad_type& a, const quad_type& b)
@@ -300,8 +300,8 @@ public:
         skew_pair_type result;
 
     public:
-        naming(Input& A_, bool& unique_) :
-            A(A_), unique(unique_), lexname(0)
+        naming(Input& A_, bool& unique_)
+            : A(A_), unique(unique_), lexname(0)
         {
             assert(!A.empty());
             unique = true;
@@ -391,7 +391,6 @@ public:
         { return (A.empty() || B.empty()); }
     };
 
-
     /**
      * Collect three characters t_i, t_{i+1}, t_{i+2} beginning at the index
      * i. Since we need at least one unique endcaracter, we free the first
@@ -541,7 +540,6 @@ public:
         { return A.empty(); }
     };
 
-
     /** Create the suffix array from the current sub problem by simple
      *  comparison-based merging.  More precisely: compare characters(out of
      *  text t) and ranks(out of ISA12) of the following constellation:
@@ -712,9 +710,8 @@ public:
         }
     };
 
-
     /** Helper function for computing the size of the 2/3 subproblem. */
-    static inline size_t subp_size(size_t n)
+    static inline size_type subp_size(size_type n)
     {
         return (n / 3) * 2 + ((n % 3) == 2);
     }
@@ -791,7 +788,7 @@ public:
         value_type result;
 
     public:
-        build_sa(S& source_, Mod1& mod_1_, Mod2& mod_2_, size_t a_size, size_t memsize)
+        build_sa(S& source_, Mod1& mod_1_, Mod2& mod_2_, size_type a_size, size_t memsize)
             : source(source_), mod_1(mod_1_), mod_2(mod_2_), index(0), ready(false)
         {
             assert(!source_.empty());
diff --git a/examples/containers/CMakeLists.txt b/examples/containers/CMakeLists.txt
index f12fd6a..12fd3d0 100644
--- a/examples/containers/CMakeLists.txt
+++ b/examples/containers/CMakeLists.txt
@@ -3,7 +3,7 @@
 #
 #  Part of the STXXL. See http://stxxl.sourceforge.net
 #
-#  Copyright (C) 2013 Timo Bingmann <tb at panthema.net>
+#  Copyright (C) 2013-2014 Timo Bingmann <tb at panthema.net>
 #
 #  Distributed under the Boost Software License, Version 1.0.
 #  (See accompanying file LICENSE_1_0.txt or copy at
@@ -24,6 +24,7 @@ stxxl_build_example(sorter1)
 stxxl_build_example(sorter2)
 stxxl_build_example(stack1)
 stxxl_build_example(stack2)
+stxxl_build_example(unordered_map1)
 stxxl_build_example(vector1)
 stxxl_build_example(vector2)
 stxxl_build_example(vector_buf)
@@ -41,6 +42,7 @@ stxxl_test(sorter1)
 stxxl_test(sorter2)
 stxxl_test(stack1)
 stxxl_test(stack2)
+stxxl_test(unordered_map1)
 stxxl_test(vector1)
 stxxl_test(vector2)
 stxxl_test(vector_buf)
diff --git a/examples/containers/deque2.cpp b/examples/containers/deque2.cpp
index ab799b5..3ffc988 100644
--- a/examples/containers/deque2.cpp
+++ b/examples/containers/deque2.cpp
@@ -34,7 +34,7 @@ int main()
     stxxl::deque_iterator<deque> deque_iterator = my_deque.begin();
 
     // Access random element x at position p(x) in the deque
-    p = rand32() % number_of_elements;
+    p = (unsigned int)(rand32() % number_of_elements);
     x = my_deque[p];
 
     // Count number of smaller elements from the front to p(x) - 1
diff --git a/examples/containers/unordered_map1.cpp b/examples/containers/unordered_map1.cpp
new file mode 100644
index 0000000..961c3af
--- /dev/null
+++ b/examples/containers/unordered_map1.cpp
@@ -0,0 +1,89 @@
+/***************************************************************************
+ *  examples/containers/unordered_map1.cpp
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2013 Daniel Feist <daniel.feist at student.kit.edu>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+//! [example]
+#include <stxxl/unordered_map>
+#include <iostream>
+
+//! [hash]
+struct HashFunctor
+{
+    size_t operator () (int key) const
+    {
+        // a simple integer hash function
+        return (size_t)(key * 2654435761u);
+    }
+};
+//! [hash]
+
+//! [comparator]
+struct CompareLess
+{
+    bool operator () (const int& a, const int& b) const
+    { return a < b; }
+
+    static int min_value() { return std::numeric_limits<int>::min(); }
+    static int max_value() { return std::numeric_limits<int>::max(); }
+};
+//! [comparator]
+
+int main()
+{
+//! [construction]
+#define SUB_BLOCK_SIZE 8192
+#define SUB_BLOCKS_PER_BLOCK 256
+
+    // template parameter <KeyType, MappedType, HashType, CompareType, SubBlockSize, SubBlocksPerBlock>
+    typedef stxxl::unordered_map<
+            int, char, HashFunctor, CompareLess, SUB_BLOCK_SIZE, SUB_BLOCKS_PER_BLOCK
+            > unordered_map_type;
+
+    // constructor: use defaults for all parameters
+    unordered_map_type my_map;
+//! [construction]
+
+    // insert some items and delete one
+    my_map.insert(std::make_pair(1, 'a'));
+    my_map.insert(std::make_pair(2, 'b'));
+    my_map.insert(std::make_pair(3, 'c'));
+    my_map.insert(std::make_pair(4, 'd'));
+
+    my_map.erase(3);
+
+    // iterate over all items in the unordered_map
+    unordered_map_type::iterator iter;
+
+    std::cout << "my_map contains:\n";
+    for (iter = my_map.begin(); iter != my_map.end(); ++iter)
+    {
+        std::cout << iter->first << " => " << iter->second << std::endl;
+    }
+
+    // direct operator[] access to items
+    std::cout << "my_map[2] = " << my_map[2] << std::endl;
+
+    // efficient bulk-insert into hash map by sorting by hash keys
+    std::vector<unordered_map_type::value_type> value_array;
+
+    for (int i = 0; i < 128; ++i)
+        value_array.push_back(std::make_pair(i, (char)i));
+
+    my_map.insert(value_array.begin(), value_array.end(), 8 * 1024 * 1024);
+
+    // check results of insertion
+    std::cout << "my_map[42] = " << my_map[42] << std::endl;
+    std::cout << "my_map.size() = " << my_map.size() << std::endl;
+
+    return 0;
+}
+//! [example]
diff --git a/include/stxxl.h b/include/stxxl.h
index 9ccfc74..790891b 100644
--- a/include/stxxl.h
+++ b/include/stxxl.h
@@ -26,6 +26,7 @@
 // map does not work with g++ 3.3
 #include <stxxl/map>
 #endif
+#include <stxxl/unordered_map>
 #include <stxxl/queue>
 #include <stxxl/deque>
 
diff --git a/include/stxxl/bits/algo/adaptor.h b/include/stxxl/bits/algo/adaptor.h
index daf4840..6e494b0 100644
--- a/include/stxxl/bits/algo/adaptor.h
+++ b/include/stxxl/bits/algo/adaptor.h
@@ -5,6 +5,7 @@
  *
  *  Copyright (C) 2002 Roman Dementiev <dementiev at mpi-sb.mpg.de>
  *  Copyright (C) 2010 Andreas Beckmann <beckmann at cs.uni-frankfurt.de>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
  *
  *  Distributed under the Boost Software License, Version 1.0.
  *  (See accompanying file LICENSE_1_0.txt or copy at
@@ -17,31 +18,32 @@
 #include <stxxl/bits/mng/bid.h>
 #include <stxxl/bits/mng/adaptor.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-template <unsigned _blk_sz, typename _run_type, class __pos_type = int_type>
-struct runs2bid_array_adaptor : public two2one_dim_array_adapter_base<_run_type*, BID<_blk_sz>, __pos_type>
+template <unsigned BlockSize, typename RunType, class PosType = int_type>
+struct runs2bid_array_adaptor : public two2one_dim_array_adapter_base<RunType*, BID<BlockSize>, PosType>
 {
-    typedef runs2bid_array_adaptor<_blk_sz, _run_type, __pos_type> _Self;
-    typedef BID<_blk_sz> data_type;
+    typedef runs2bid_array_adaptor<BlockSize, RunType, PosType> self_type;
+    typedef BID<BlockSize> data_type;
 
-    enum    { block_size = _blk_sz };
+    enum {
+        block_size = BlockSize
+    };
 
     unsigned_type dim_size;
 
-    typedef two2one_dim_array_adapter_base<_run_type*, BID<_blk_sz>, __pos_type> _Parent;
-    using _Parent::array;
-    using _Parent::pos;
+    typedef two2one_dim_array_adapter_base<RunType*, BID<BlockSize>, PosType> parent_type;
+    using parent_type::array;
+    using parent_type::pos;
 
-    runs2bid_array_adaptor(_run_type** a, __pos_type p, unsigned_type d)
-        : two2one_dim_array_adapter_base<_run_type*, BID<_blk_sz>, __pos_type>(a, p), dim_size(d)
+    runs2bid_array_adaptor(RunType** a, PosType p, unsigned_type d)
+        : two2one_dim_array_adapter_base<RunType*, BID<BlockSize>, PosType>(a, p), dim_size(d)
     { }
-    runs2bid_array_adaptor(const _Self& a)
-        : two2one_dim_array_adapter_base<_run_type*, BID<_blk_sz>, __pos_type>(a), dim_size(a.dim_size)
+    runs2bid_array_adaptor(const self_type& a)
+        : two2one_dim_array_adapter_base<RunType*, BID<BlockSize>, PosType>(a), dim_size(a.dim_size)
     { }
 
-    const _Self& operator = (const _Self& a)
+    const self_type& operator = (const self_type& a)
     {
         array = a.array;
         pos = a.pos;
@@ -52,7 +54,7 @@ struct runs2bid_array_adaptor : public two2one_dim_array_adapter_base<_run_type*
     data_type& operator * ()
     {
         CHECK_RUN_BOUNDS(pos);
-        return (BID<_blk_sz>&)((*(array[(pos) % dim_size]))[(pos) / dim_size].bid);
+        return (BID<BlockSize>&)((*(array[(pos) % dim_size]))[(pos) / dim_size].bid);
     }
 
     const data_type* operator -> () const
@@ -61,45 +63,45 @@ struct runs2bid_array_adaptor : public two2one_dim_array_adapter_base<_run_type*
         return &((*(array[(pos) % dim_size])[(pos) / dim_size].bid));
     }
 
-
-    data_type& operator [] (__pos_type n) const
+    data_type& operator [] (PosType n) const
     {
         n += pos;
         CHECK_RUN_BOUNDS(n);
-        return (BID<_blk_sz>&)((*(array[(n) % dim_size]))[(n) / dim_size].bid);
+        return (BID<BlockSize>&)((*(array[(n) % dim_size]))[(n) / dim_size].bid);
     }
 };
 
 BLOCK_ADAPTOR_OPERATORS(runs2bid_array_adaptor)
 
-template <unsigned _blk_sz, typename _run_type, class __pos_type = int_type>
+template <unsigned BlockSize, typename RunType, class PosType = int_type>
 struct runs2bid_array_adaptor2
-    : public two2one_dim_array_adapter_base<_run_type*, BID<_blk_sz>, __pos_type>
+    : public two2one_dim_array_adapter_base<RunType*, BID<BlockSize>, PosType>
 {
-    typedef runs2bid_array_adaptor2<_blk_sz, _run_type, __pos_type> _Self;
-    typedef BID<_blk_sz> data_type;
+    typedef runs2bid_array_adaptor2<BlockSize, RunType, PosType> self_type;
+    typedef BID<BlockSize> data_type;
 
-    typedef two2one_dim_array_adapter_base<_run_type*, BID<_blk_sz>, __pos_type> ParentClass_;
+    typedef two2one_dim_array_adapter_base<RunType*, BID<BlockSize>, PosType> base_type;
 
-    using ParentClass_::pos;
-    using ParentClass_::array;
+    using base_type::pos;
+    using base_type::array;
 
-    enum
-    { block_size = _blk_sz };
+    enum {
+        block_size = BlockSize
+    };
 
-    __pos_type w, h, K;
+    PosType w, h, K;
 
-    runs2bid_array_adaptor2(_run_type** a, __pos_type p, int_type _w, int_type _h)
-        : two2one_dim_array_adapter_base<_run_type*, BID<_blk_sz>, __pos_type>(a, p),
+    runs2bid_array_adaptor2(RunType** a, PosType p, int_type _w, int_type _h)
+        : two2one_dim_array_adapter_base<RunType*, BID<BlockSize>, PosType>(a, p),
           w(_w), h(_h), K(_w * _h)
     { }
 
-    runs2bid_array_adaptor2(const _Self& a)
-        : two2one_dim_array_adapter_base<_run_type*, BID<_blk_sz>, __pos_type>(a),
+    runs2bid_array_adaptor2(const self_type& a)
+        : two2one_dim_array_adapter_base<RunType*, BID<BlockSize>, PosType>(a),
           w(a.w), h(a.h), K(a.K)
     { }
 
-    const _Self& operator = (const _Self& a)
+    const self_type& operator = (const self_type& a)
     {
         array = a.array;
         pos = a.pos;
@@ -111,49 +113,45 @@ struct runs2bid_array_adaptor2
 
     data_type& operator * ()
     {
-        register __pos_type i = pos - K;
+        PosType i = pos - K;
         if (i < 0)
-            return (BID<_blk_sz>&)((*(array[(pos) % w]))[(pos) / w].bid);
+            return (BID<BlockSize>&)((*(array[(pos) % w]))[(pos) / w].bid);
 
-        register __pos_type _w = w;
+        PosType _w = w;
         _w--;
-        return (BID<_blk_sz>&)((*(array[(i) % _w]))[h + (i / _w)].bid);
+        return (BID<BlockSize>&)((*(array[(i) % _w]))[h + (i / _w)].bid);
     }
 
     const data_type* operator -> () const
     {
-        register __pos_type i = pos - K;
+        PosType i = pos - K;
         if (i < 0)
             return &((*(array[(pos) % w])[(pos) / w].bid));
 
-
-        register __pos_type _w = w;
+        PosType _w = w;
         _w--;
         return &((*(array[(i) % _w])[h + (i / _w)].bid));
     }
 
-
-    data_type& operator [] (__pos_type n) const
+    data_type& operator [] (PosType n) const
     {
         n += pos;
-        register __pos_type i = n - K;
+        PosType i = n - K;
         if (i < 0)
-            return (BID<_blk_sz>&)((*(array[(n) % w]))[(n) / w].bid);
-
+            return (BID<BlockSize>&)((*(array[(n) % w]))[(n) / w].bid);
 
-        register __pos_type _w = w;
+        PosType _w = w;
         _w--;
-        return (BID<_blk_sz>&)((*(array[(i) % _w]))[h + (i / _w)].bid);
+        return (BID<BlockSize>&)((*(array[(i) % _w]))[h + (i / _w)].bid);
     }
 };
 
 BLOCK_ADAPTOR_OPERATORS(runs2bid_array_adaptor2)
 
-
 template <typename trigger_iterator_type>
 struct trigger_entry_iterator
 {
-    typedef trigger_entry_iterator<trigger_iterator_type> _Self;
+    typedef trigger_entry_iterator<trigger_iterator_type> self_type;
     typedef typename std::iterator_traits<trigger_iterator_type>::value_type::bid_type bid_type;
 
     // STL typedefs
@@ -165,7 +163,7 @@ struct trigger_entry_iterator
 
     trigger_iterator_type value;
 
-    trigger_entry_iterator(const _Self& a) : value(a.value) { }
+    trigger_entry_iterator(const self_type& a) : value(a.value) { }
     trigger_entry_iterator(trigger_iterator_type v) : value(v) { }
 
     bid_type& operator * ()
@@ -185,51 +183,51 @@ struct trigger_entry_iterator
         return (value + n)->bid;
     }
 
-    _Self& operator ++ ()
+    self_type& operator ++ ()
     {
         value++;
         return *this;
     }
-    _Self operator ++ (int)
+    self_type operator ++ (int)
     {
-        _Self __tmp = *this;
+        self_type tmp = *this;
         value++;
-        return __tmp;
+        return tmp;
     }
-    _Self& operator -- ()
+    self_type& operator -- ()
     {
         value--;
         return *this;
     }
-    _Self operator -- (int)
+    self_type operator -- (int)
     {
-        _Self __tmp = *this;
+        self_type tmp = *this;
         value--;
-        return __tmp;
+        return tmp;
     }
-    bool operator == (const _Self& a) const
+    bool operator == (const self_type& a) const
     {
         return value == a.value;
     }
-    bool operator != (const _Self& a) const
+    bool operator != (const self_type& a) const
     {
         return value != a.value;
     }
-    _Self operator += (int_type n)
+    self_type operator += (int_type n)
     {
         value += n;
         return *this;
     }
-    _Self operator -= (int_type n)
+    self_type operator -= (int_type n)
     {
         value -= n;
         return *this;
     }
-    int_type operator - (const _Self& a) const
+    int_type operator - (const self_type& a) const
     {
         return value - a.value;
     }
-    int_type operator + (const _Self& a) const
+    int_type operator + (const self_type& a) const
     {
         return value + a.value;
     }
diff --git a/include/stxxl/bits/algo/async_schedule.h b/include/stxxl/bits/algo/async_schedule.h
index 879b4b9..ed15f05 100644
--- a/include/stxxl/bits/algo/async_schedule.h
+++ b/include/stxxl/bits/algo/async_schedule.h
@@ -20,6 +20,7 @@
 // DOI: 10.1137/S0097539703431573
 
 #include <stxxl/bits/common/types.h>
+#include <stxxl/bits/common/simple_vector.h>
 #include <stxxl/bits/namespace.h>
 
 STXXL_BEGIN_NAMESPACE
@@ -41,36 +42,34 @@ inline void compute_prefetch_schedule(
     compute_prefetch_schedule(static_cast<const int_type*>(first), last, out_first, m, D);
 }
 
-template <typename run_type>
+template <typename RunType>
 void compute_prefetch_schedule(
-    const run_type& input,
+    const RunType& input,
     int_type* out_first,
     int_type m,
     int_type D)
 {
     const int_type L = input.size();
-    int_type* disks = new int_type[L];
+    simple_vector<int_type> disks(L);
     for (int_type i = 0; i < L; ++i)
-        disks[i] = input[i].bid.storage->get_physical_device_id();
-    compute_prefetch_schedule(disks, disks + L, out_first, m, D);
-    delete[] disks;
+        disks[i] = input[i].bid.storage->get_device_id();
+    compute_prefetch_schedule(disks.begin(), disks.end(), out_first, m, D);
 }
 
-template <typename bid_iterator_type>
+template <typename BidIteratorType>
 void compute_prefetch_schedule(
-    bid_iterator_type input_begin,
-    bid_iterator_type input_end,
+    BidIteratorType input_begin,
+    BidIteratorType input_end,
     int_type* out_first,
     int_type m,
     int_type D)
 {
     const int_type L = input_end - input_begin;
-    int_type* disks = new int_type[L];
+    simple_vector<int_type> disks(L);
     int_type i = 0;
-    for (bid_iterator_type it = input_begin; it != input_end; ++it, ++i)
-        disks[i] = it->storage->get_physical_device_id();
-    compute_prefetch_schedule(disks, disks + L, out_first, m, D);
-    delete[] disks;
+    for (BidIteratorType it = input_begin; it != input_end; ++it, ++i)
+        disks[i] = it->storage->get_device_id();
+    compute_prefetch_schedule(disks.begin(), disks.end(), out_first, m, D);
 }
 
 STXXL_END_NAMESPACE
diff --git a/include/stxxl/bits/algo/inmemsort.h b/include/stxxl/bits/algo/inmemsort.h
index abe79e8..3f260c8 100644
--- a/include/stxxl/bits/algo/inmemsort.h
+++ b/include/stxxl/bits/algo/inmemsort.h
@@ -23,13 +23,12 @@
 
 #include <algorithm>
 
-
 STXXL_BEGIN_NAMESPACE
 
-template <typename ExtIterator_, typename StrictWeakOrdering_>
-void stl_in_memory_sort(ExtIterator_ first, ExtIterator_ last, StrictWeakOrdering_ cmp)
+template <typename ExtIterator, typename StrictWeakOrdering>
+void stl_in_memory_sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp)
 {
-    typedef typename ExtIterator_::block_type block_type;
+    typedef typename ExtIterator::block_type block_type;
 
     STXXL_VERBOSE("stl_in_memory_sort, range: " << (last - first));
     first.flush();
@@ -41,7 +40,6 @@ void stl_in_memory_sort(ExtIterator_ first, ExtIterator_ last, StrictWeakOrderin
     for (i = 0; i < nblocks; ++i)
         reqs[i] = blocks[i].read(*(first.bid() + i));
 
-
     wait_all(reqs.begin(), nblocks);
 
     unsigned_type last_block_correction = last.block_offset() ? (block_type::size - last.block_offset()) : 0;
@@ -57,7 +55,6 @@ void stl_in_memory_sort(ExtIterator_ first, ExtIterator_ last, StrictWeakOrderin
     wait_all(reqs.begin(), nblocks);
 }
 
-
 STXXL_END_NAMESPACE
 
 #endif // !STXXL_ALGO_INMEMSORT_HEADER
diff --git a/include/stxxl/bits/algo/intksort.h b/include/stxxl/bits/algo/intksort.h
index 7d9504b..7e264b2 100644
--- a/include/stxxl/bits/algo/intksort.h
+++ b/include/stxxl/bits/algo/intksort.h
@@ -19,21 +19,20 @@
 #include <stxxl/bits/unused.h>
 #include <stxxl/bits/parallel.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-template <typename type_key>
+template <typename TypeKey>
 static void
-count(type_key* a, type_key* aEnd, int_type* bucket, int_type K, typename type_key::key_type offset,
-      unsigned shift)
+count(TypeKey* a, TypeKey* aEnd, int_type* bucket, int_type K,
+      typename TypeKey::key_type offset, unsigned shift)
 {
     // reset buckets
     std::fill(bucket, bucket + K, 0);
 
     // count occupancies
-    for (type_key* p = a; p < aEnd; p++)
+    for (TypeKey* p = a; p < aEnd; p++)
     {
-        int_type i = (p->key - offset) >> shift;
+        int_type i = (int_type)((p->key - offset) >> shift);
         /*
         if (!(i < K && i >= 0))
         {
@@ -45,7 +44,6 @@ count(type_key* a, type_key* aEnd, int_type* bucket, int_type K, typename type_k
     }
 }
 
-
 static inline void
 exclusive_prefix_sum(int_type* bucket, int_type K)
 {
@@ -58,35 +56,34 @@ exclusive_prefix_sum(int_type* bucket, int_type K)
     }
 }
 
-
 // distribute input a to output b using bucket for the starting indices
-template <typename type_key>
+template <typename TypeKey>
 static void
-classify(type_key* a, type_key* aEnd, type_key* b, int_type* bucket, typename type_key::key_type offset, unsigned shift)
+classify(TypeKey* a, TypeKey* aEnd, TypeKey* b, int_type* bucket,
+         typename TypeKey::key_type offset, unsigned shift)
 {
-    for (type_key* p = a; p < aEnd; p++)
+    for (TypeKey* p = a; p < aEnd; p++)
     {
-        int_type i = (p->key - offset) >> shift;
+        int_type i = (int_type)((p->key - offset) >> shift);
         int_type bi = bucket[i];
         b[bi] = *p;
         bucket[i] = bi + 1;
     }
 }
 
-
-template <class T>
+template <class Type>
 inline void
-sort2(T& a, T& b)
+sort2(Type& a, Type& b)
 {
     if (b < a)
         std::swap(a, b);
 }
 
-template <class T>
+template <class Type>
 inline void
-sort3(T& a, T& b, T& c)
+sort3(Type& a, Type& b, Type& c)
 {
-    T temp;
+    Type temp;
     if (b < a)
     {
         if (c < a)
@@ -128,10 +125,9 @@ sort3(T& a, T& b, T& c)
     // Assert1 (!(b < a) && !(c < b));
 }
 
-
-template <class T>
+template <class Type>
 inline void
-sort4(T& a, T& b, T& c, T& d)
+sort4(Type& a, Type& b, Type& c, Type& d)
 {
     sort2(a, b);
     sort2(c, d);                // a < b ; c < d
@@ -146,7 +142,7 @@ sort4(T& a, T& b, T& c, T& d)
         {                       // c < a < {db}
             if (d < b)
             {                   // c < a < d < b
-                T temp = a;
+                Type temp = a;
                 a = c;
                 c = d;
                 d = b;
@@ -154,7 +150,7 @@ sort4(T& a, T& b, T& c, T& d)
             }
             else
             {                   // c < a < b < d
-                T temp = a;
+                Type temp = a;
                 a = c;
                 c = b;
                 b = temp;
@@ -167,7 +163,7 @@ sort4(T& a, T& b, T& c, T& d)
         {                       // c < (bd)
             if (d < b)
             {                   // c < d < b
-                T temp = b;
+                Type temp = b;
                 b = c;
                 c = d;
                 d = temp;
@@ -181,10 +177,9 @@ sort4(T& a, T& b, T& c, T& d)
     //Assert1 (!(b < a) && !(c < b) & !(d < c));
 }
 
-
-template <class T>
+template <class Type>
 inline void
-sort5(T& a, T& b, T& c, T& d, T& e)
+sort5(Type& a, Type& b, Type& c, Type& d, Type& e)
 {
     sort2(a, b);
     sort2(d, e);
@@ -207,7 +202,7 @@ sort5(T& a, T& b, T& c, T& d, T& e)
     {                           // c < d < {be}
         if (e < b)
         {                       // c < d < e < b
-            T temp = b;
+            Type temp = b;
             b = c;
             c = d;
             d = e;
@@ -215,7 +210,7 @@ sort5(T& a, T& b, T& c, T& d, T& e)
         }
         else
         {                       // c < d < b < e
-            T temp = b;
+            Type temp = b;
             b = c;
             c = d;
             d = temp;
@@ -228,16 +223,15 @@ sort5(T& a, T& b, T& c, T& d, T& e)
     //Assert1 (!(b < a) && !(c < b) & !(d < c) & !(e < d));
 }
 
-
-template <class T>
+template <class Type>
 inline void
-insertion_sort(T* a, T* aEnd)
+insertion_sort(Type* a, Type* aEnd)
 {
-    T* pp;
-    for (T* p = a + 1; p < aEnd; p++)
+    Type* pp;
+    for (Type* p = a + 1; p < aEnd; p++)
     {
         // Invariant a..p-1 is sorted;
-        T t = *p;
+        Type t = *p;
         if (t < *a)
         {   // new minimum
             // move stuff to the right
@@ -262,14 +256,14 @@ insertion_sort(T* a, T* aEnd)
 // sort each bucket
 // bucket[i] is an index one off to the right from
 // the end of the i-th bucket
-template <class T>
+template <class Type>
 static void
-cleanup(T* b, int_type* bucket, int_type K)
+cleanup(Type* b, int_type* bucket, int_type K)
 {
-    T* c = b;
+    Type* c = b;
     for (int_type i = 0; i < K; i++)
     {
-        T* cEnd = b + bucket[i];
+        Type* cEnd = b + bucket[i];
         switch (cEnd - c)
         {
         case 0:
@@ -318,11 +312,12 @@ cleanup(T* b, int_type* bucket, int_type K)
 // and using (key(x) - offset) >> shift to index buckets.
 // the input comes from a..aEnd-1
 // the output goes to b
-template <typename type_key>
+template <typename TypeKey>
 void
-l1sort(type_key* a,
-       type_key* aEnd,
-       type_key* b, int_type* bucket, int_type K, typename type_key::key_type offset, int shift)
+l1sort(TypeKey* a,
+       TypeKey* aEnd,
+       TypeKey* b, int_type* bucket, int_type K,
+       typename TypeKey::key_type offset, int shift)
 {
     count(a, aEnd, bucket, K, offset, shift);
     exclusive_prefix_sum(bucket, K);
@@ -330,29 +325,29 @@ l1sort(type_key* a,
     cleanup(b, bucket, K);
 }
 
-template <typename type, typename type_key, typename key_extractor>
-void classify_block(type* begin, type* end, type_key*& out,
-                    int_type* bucket, typename key_extractor::key_type offset, unsigned shift, key_extractor keyobj)
+template <typename Type, typename TypeKey, typename KeyExtractor>
+void classify_block(Type* begin, Type* end, TypeKey*& out,
+                    int_type* bucket, typename KeyExtractor::key_type offset, unsigned shift, KeyExtractor keyobj)
 {
-    assert(shift < (sizeof(typename key_extractor::key_type) * 8 + 1));
-    for (type* p = begin; p < end; p++, out++)  // count & create references
+    assert(shift < (sizeof(typename KeyExtractor::key_type) * 8 + 1));
+    for (Type* p = begin; p < end; p++, out++)  // count & create references
     {
         out->ptr = p;
-        typename key_extractor::key_type key = keyobj(*p);
-        int_type ibucket = (key - offset) >> shift;
+        typename KeyExtractor::key_type key = keyobj(*p);
+        int_type ibucket = (int_type)((key - offset) >> shift);
         out->key = key;
         bucket[ibucket]++;
     }
 }
-template <typename type, typename type_key, typename key_extractor>
-void classify_block(type* begin, type* end, type_key*& out, int_type* bucket, typename type::key_type offset, unsigned shift,
-                    const int_type K, key_extractor keyobj)
+template <typename Type, typename TypeKey, typename KeyExtractor>
+void classify_block(Type* begin, Type* end, TypeKey*& out, int_type* bucket, typename Type::key_type offset, unsigned shift,
+                    const int_type K, KeyExtractor keyobj)
 {
-    assert(shift < (sizeof(typename type::key_type) * 8 + 1));
-    for (type* p = begin; p < end; p++, out++)  // count & create references
+    assert(shift < (sizeof(typename Type::key_type) * 8 + 1));
+    for (Type* p = begin; p < end; p++, out++)  // count & create references
     {
         out->ptr = p;
-        typename type::key_type key = keyobj(*p);
+        typename Type::key_type key = keyobj(*p);
         int_type ibucket = (key - offset) >> shift;
         /*
         if (!(ibucket < K && ibucket >= 0))
diff --git a/include/stxxl/bits/algo/ksort.h b/include/stxxl/bits/algo/ksort.h
index 208525e..33ec8cf 100644
--- a/include/stxxl/bits/algo/ksort.h
+++ b/include/stxxl/bits/algo/ksort.h
@@ -33,15 +33,13 @@
 #include <stxxl/bits/common/is_sorted.h>
 #include <stxxl/bits/common/utils.h>
 
-
 //#define INTERLEAVED_ALLOC
 
 #define OPT_MERGING
 
 STXXL_BEGIN_NAMESPACE
 
-//! \defgroup stllayer STL-User Layer
-//! Layer which groups STL compatible algorithms and containers
+//! \addtogroup stllayer
 
 //! \defgroup stlalgo Algorithms
 //! \ingroup stllayer
@@ -67,7 +65,6 @@ struct trigger_entry
     }
 };
 
-
 template <typename BIDType, typename KeyType>
 inline bool operator < (const trigger_entry<BIDType, KeyType>& a,
                         const trigger_entry<BIDType, KeyType>& b)
@@ -82,36 +79,35 @@ inline bool operator > (const trigger_entry<BIDType, KeyType>& a,
     return (a.key > b.key);
 }
 
-template <typename type, typename key_type1>
+template <typename Type, typename KeyType>
 struct type_key
 {
-    typedef key_type1 key_type;
+    typedef KeyType key_type;
     key_type key;
-    type* ptr;
+    Type* ptr;
 
     type_key() { }
-    type_key(key_type k, type* p) : key(k), ptr(p)
+    type_key(key_type k, Type* p) : key(k), ptr(p)
     { }
 };
 
-template <typename type, typename key1>
-bool operator < (const type_key<type, key1>& a, const type_key<type, key1>& b)
+template <typename Type, typename KeyType>
+bool operator < (const type_key<Type, KeyType>& a, const type_key<Type, KeyType>& b)
 {
     return a.key < b.key;
 }
 
-template <typename type, typename key1>
-bool operator > (const type_key<type, key1>& a, const type_key<type, key1>& b)
+template <typename Type, typename KeyType>
+bool operator > (const type_key<Type, KeyType>& a, const type_key<Type, KeyType>& b)
 {
     return a.key > b.key;
 }
 
-
-template <typename block_type, typename bid_type>
+template <typename BlockType, typename BidType>
 struct write_completion_handler
 {
-    block_type* block;
-    bid_type bid;
+    BlockType* block;
+    BidType bid;
     request_ptr* req;
     void operator () (request* /*completed_req*/)
     {
@@ -119,34 +115,34 @@ struct write_completion_handler
     }
 };
 
-template <typename type_key_,
-          typename block_type,
-          typename run_type,
-          typename input_bid_iterator,
-          typename key_extractor>
+template <typename TypeKey,
+          typename BlockType,
+          typename RunType,
+          typename InputBidIterator,
+          typename KeyExtractor>
 inline void write_out(
-    type_key_* begin,
-    type_key_* end,
-    block_type*& cur_blk,
-    const block_type* end_blk,
+    TypeKey* begin,
+    TypeKey* end,
+    BlockType*& cur_blk,
+    const BlockType* end_blk,
     int_type& out_block,
     int_type& out_pos,
-    run_type& run,
-    write_completion_handler<block_type, typename block_type::bid_type>*& next_read,
-    typename block_type::bid_type*& bids,
+    RunType& run,
+    write_completion_handler<BlockType, typename BlockType::bid_type>*& next_read,
+    typename BlockType::bid_type*& bids,
     request_ptr* write_reqs,
     request_ptr* read_reqs,
-    input_bid_iterator& it,
-    key_extractor keyobj)
+    InputBidIterator& it,
+    KeyExtractor keyobj)
 {
-    typedef typename block_type::type type;
+    typedef typename BlockType::type type;
 
     type* elem = cur_blk->elem;
-    for (type_key_* p = begin; p < end; p++)
+    for (TypeKey* p = begin; p < end; p++)
     {
         elem[out_pos++] = *(p->ptr);
 
-        if (out_pos >= block_type::size)
+        if (out_pos >= BlockType::size)
         {
             run[out_block].key = keyobj(*(cur_blk->elem));
 
@@ -177,40 +173,40 @@ inline void write_out(
 }
 
 template <
-    typename block_type,
-    typename run_type,
-    typename input_bid_iterator,
-    typename key_extractor>
+    typename BlockType,
+    typename RunType,
+    typename InputBidIterator,
+    typename KeyExtractor>
 void
 create_runs(
-    input_bid_iterator it,
-    run_type** runs,
+    InputBidIterator it,
+    RunType** runs,
     const unsigned_type nruns,
     const unsigned_type m2,
-    key_extractor keyobj)
+    KeyExtractor keyobj)
 {
-    typedef typename block_type::value_type type;
-    typedef typename block_type::bid_type bid_type;
-    typedef typename key_extractor::key_type key_type;
+    typedef typename BlockType::value_type type;
+    typedef typename BlockType::bid_type bid_type;
+    typedef typename KeyExtractor::key_type key_type;
     typedef type_key<type, key_type> type_key_;
 
     block_manager* bm = block_manager::get_instance();
-    block_type* Blocks1 = new block_type[m2];
-    block_type* Blocks2 = new block_type[m2];
+    BlockType* Blocks1 = new BlockType[m2];
+    BlockType* Blocks2 = new BlockType[m2];
     bid_type* bids = new bid_type[m2];
     type_key_* refs1 = new type_key_[m2 * Blocks1->size];
     type_key_* refs2 = new type_key_[m2 * Blocks1->size];
     request_ptr* read_reqs = new request_ptr[m2];
     request_ptr* write_reqs = new request_ptr[m2];
-    write_completion_handler<block_type, bid_type>* next_run_reads =
-        new write_completion_handler<block_type, bid_type>[m2];
+    write_completion_handler<BlockType, bid_type>* next_run_reads =
+        new write_completion_handler<BlockType, bid_type>[m2];
 
-    run_type* run;
+    RunType* run;
     run = *runs;
     int_type run_size = (*runs)->size();
     key_type offset = 0;
-    const int log_k1 = ilog2_ceil((m2 * block_type::size * sizeof(type_key_) / STXXL_L2_SIZE) ?
-                                  (m2 * block_type::size * sizeof(type_key_) / STXXL_L2_SIZE) : 2);
+    const int log_k1 = ilog2_ceil((m2 * BlockType::size * sizeof(type_key_) / STXXL_L2_SIZE) ?
+                                  (m2 * BlockType::size * sizeof(type_key_) / STXXL_L2_SIZE) : 2);
     const int log_k2 = ilog2_floor(m2 * Blocks1->size) - log_k1 - 1;
     STXXL_VERBOSE("log_k1: " << log_k1 << " log_k2:" << log_k2);
     const int_type k1 = int_type(1) << log_k1;
@@ -262,9 +258,9 @@ create_runs(
         // recurse on each bucket
         type_key_* c = refs2;
         type_key_* d = refs1;
-        block_type* cur_blk = Blocks2;
-        block_type* end_blk = Blocks2 + next_run_size;
-        write_completion_handler<block_type, bid_type>* next_read = next_run_reads;
+        BlockType* cur_blk = Blocks2;
+        BlockType* end_blk = Blocks2 + next_run_size;
+        write_completion_handler<BlockType, bid_type>* next_read = next_run_reads;
 
         for (i = 0; i < k1; i++)
         {
@@ -300,17 +296,20 @@ create_runs(
     delete[] write_reqs;
 }
 
-template <typename block_type,
+template <typename BlockType,
           typename prefetcher_type,
-          typename key_extractor>
-struct run_cursor2_cmp : public std::binary_function<run_cursor2<block_type, prefetcher_type>, run_cursor2<block_type, prefetcher_type>, bool>
+          typename KeyExtractor>
+struct run_cursor2_cmp : public std::binary_function<
+                             run_cursor2<BlockType, prefetcher_type>,
+                             run_cursor2<BlockType, prefetcher_type>,
+                             bool
+                             >
 {
-    typedef run_cursor2<block_type, prefetcher_type> cursor_type;
-    key_extractor keyobj;
-    run_cursor2_cmp(key_extractor keyobj_)
-    {
-        keyobj = keyobj_;
-    }
+    typedef run_cursor2<BlockType, prefetcher_type> cursor_type;
+    KeyExtractor keyobj;
+    run_cursor2_cmp(KeyExtractor _keyobj)
+        : keyobj(_keyobj)
+    { }
     inline bool operator () (const cursor_type& a, const cursor_type& b) const
     {
         if (UNLIKELY(b.empty()))
@@ -327,29 +326,28 @@ private:
     run_cursor2_cmp() { }
 };
 
-
-template <typename record_type, typename key_extractor>
-class key_comparison : public std::binary_function<record_type, record_type, bool>
+template <typename RecordType, typename KeyExtractor>
+class key_comparison : public std::binary_function<RecordType, RecordType, bool>
 {
-    key_extractor ke;
+    KeyExtractor ke;
 
 public:
     key_comparison() { }
-    key_comparison(key_extractor ke_) : ke(ke_) { }
-    bool operator () (const record_type& a, const record_type& b) const
+    key_comparison(KeyExtractor ke_) : ke(ke_) { }
+    bool operator () (const RecordType& a, const RecordType& b) const
     {
         return ke(a) < ke(b);
     }
 };
 
-
-template <typename block_type, typename run_type, typename key_ext_>
-bool check_ksorted_runs(run_type** runs,
+template <typename BlockType, typename RunType, typename KeyExtractor>
+bool check_ksorted_runs(RunType** runs,
                         unsigned_type nruns,
                         unsigned_type m,
-                        key_ext_ keyext)
+                        KeyExtractor keyext)
 {
-    typedef typename block_type::value_type value_type;
+    typedef BlockType block_type;
+    typedef typename BlockType::value_type value_type;
 
     STXXL_MSG("check_ksorted_runs  Runs: " << nruns);
     unsigned_type irun = 0;
@@ -411,7 +409,7 @@ bool check_ksorted_runs(run_type** runs,
             }
             if (!stxxl::is_sorted(make_element_iterator(blocks, 0),
                                   make_element_iterator(blocks, nelements),
-                                  key_comparison<value_type, key_ext_>()))
+                                  key_comparison<value_type, KeyExtractor>()))
             {
                 STXXL_MSG("check_sorted_runs  wrong order in the run " << irun);
                 STXXL_MSG("Data in blocks:");
@@ -441,19 +439,19 @@ bool check_ksorted_runs(run_type** runs,
     return true;
 }
 
-
-template <typename block_type, typename run_type, typename key_extractor>
-void merge_runs(run_type** in_runs, unsigned_type nruns, run_type* out_run, unsigned_type _m, key_extractor keyobj)
+template <typename BlockType, typename RunType, typename KeyExtractor>
+void merge_runs(RunType** in_runs, unsigned_type nruns, RunType* out_run, unsigned_type _m, KeyExtractor keyobj)
 {
-    typedef block_prefetcher<block_type, typename run_type::iterator> prefetcher_type;
-    typedef run_cursor2<block_type, prefetcher_type> run_cursor_type;
+    typedef BlockType block_type;
+    typedef block_prefetcher<BlockType, typename RunType::iterator> prefetcher_type;
+    typedef run_cursor2<BlockType, prefetcher_type> run_cursor_type;
 
     unsigned_type i;
-    run_type consume_seq(out_run->size());
+    RunType consume_seq(out_run->size());
 
     int_type* prefetch_seq = new int_type[out_run->size()];
 
-    typename run_type::iterator copy_start = consume_seq.begin();
+    typename RunType::iterator copy_start = consume_seq.begin();
     for (i = 0; i < nruns; i++)
     {
         // TODO: try to avoid copy
@@ -483,14 +481,13 @@ void merge_runs(run_type** in_runs, unsigned_type nruns, run_type* out_run, unsi
         consume_seq,
         prefetch_seq,
         n_opt_prefetch_buffers,
-        disks_number);
+        config::get_instance()->get_max_device_id());
 #else
     for (i = 0; i < out_run->size(); i++)
         prefetch_seq[i] = i;
 
 #endif
 
-
     prefetcher_type prefetcher(consume_seq.begin(),
                                consume_seq.end(),
                                prefetch_seq,
@@ -500,10 +497,10 @@ void merge_runs(run_type** in_runs, unsigned_type nruns, run_type* out_run, unsi
 
     unsigned_type out_run_size = out_run->size();
 
-    run_cursor2_cmp<block_type, prefetcher_type, key_extractor> cmp(keyobj);
+    run_cursor2_cmp<block_type, prefetcher_type, KeyExtractor> cmp(keyobj);
     loser_tree<
         run_cursor_type,
-        run_cursor2_cmp<block_type, prefetcher_type, key_extractor> >
+        run_cursor2_cmp<block_type, prefetcher_type, KeyExtractor> >
     losers(&prefetcher, nruns, cmp);
 
     block_type* out_buffer = writer.get_free_block();
@@ -528,27 +525,29 @@ void merge_runs(run_type** in_runs, unsigned_type nruns, run_type* out_run, unsi
     }
 }
 
-
-template <typename block_type,
-          typename alloc_strategy,
-          typename input_bid_iterator,
-          typename key_extractor>
-
-simple_vector<trigger_entry<typename block_type::bid_type, typename key_extractor::key_type> >*
-ksort_blocks(input_bid_iterator input_bids, unsigned_type _n, unsigned_type _m, key_extractor keyobj)
+template <typename BlockType,
+          typename AllocStrategy,
+          typename InputBidIterator,
+          typename KeyExtractor>
+simple_vector<
+    trigger_entry<typename BlockType::bid_type, typename KeyExtractor::key_type>
+    >*
+ksort_blocks(InputBidIterator input_bids, unsigned_type _n,
+             unsigned_type _m, KeyExtractor keyobj)
 {
-    typedef typename block_type::value_type type;
-    typedef typename key_extractor::key_type key_type;
-    typedef typename block_type::bid_type bid_type;
-    typedef trigger_entry<bid_type, typename key_extractor::key_type> trigger_entry_type;
+    typedef BlockType block_type;
+    typedef typename BlockType::value_type type;
+    typedef typename KeyExtractor::key_type key_type;
+    typedef typename BlockType::bid_type bid_type;
+    typedef trigger_entry<bid_type, typename KeyExtractor::key_type> trigger_entry_type;
     typedef simple_vector<trigger_entry_type> run_type;
-    typedef typename interleaved_alloc_traits<alloc_strategy>::strategy interleaved_alloc_strategy;
+    typedef typename interleaved_alloc_traits<AllocStrategy>::strategy interleaved_alloc_strategy;
 
     unsigned_type m2 = div_ceil(_m, 2);
     const unsigned_type m2_rf = m2 * block_type::raw_size /
                                 (block_type::raw_size + block_type::size * sizeof(type_key<type, key_type>));
     STXXL_VERBOSE("Reducing number of blocks in a run from " << m2 << " to " <<
-                  m2_rf << " due to key size: " << sizeof(typename key_extractor::key_type) << " bytes");
+                  m2_rf << " due to key size: " << sizeof(typename KeyExtractor::key_type) << " bytes");
     m2 = m2_rf;
     unsigned_type full_runs = _n / m2;
     unsigned_type partial_runs = ((_n % m2) ? 1 : 0);
@@ -566,21 +565,20 @@ ksort_blocks(input_bid_iterator input_bids, unsigned_type _n, unsigned_type _m,
     for (i = 0; i < full_runs; i++)
         runs[i] = new run_type(m2);
 
-
 #ifdef INTERLEAVED_ALLOC
     if (partial_runs)
     {
         unsigned_type last_run_size = _n - full_runs * m2;
         runs[i] = new run_type(last_run_size);
 
-        mng->new_blocks(interleaved_alloc_strategy(nruns, alloc_strategy()),
+        mng->new_blocks(interleaved_alloc_strategy(nruns, AllocStrategy()),
                         runs2bid_array_adaptor2<block_type::raw_size, run_type>
                             (runs, 0, nruns, last_run_size),
                         runs2bid_array_adaptor2<block_type::raw_size, run_type>
                             (runs, _n, nruns, last_run_size));
     }
     else
-        mng->new_blocks(interleaved_alloc_strategy(nruns, alloc_strategy()),
+        mng->new_blocks(interleaved_alloc_strategy(nruns, AllocStrategy()),
                         runs2bid_array_adaptor<block_type::raw_size, run_type>
                             (runs, 0, nruns),
                         runs2bid_array_adaptor<block_type::raw_size, run_type>
@@ -592,14 +590,12 @@ ksort_blocks(input_bid_iterator input_bids, unsigned_type _n, unsigned_type _m,
 
     for (i = 0; i < nruns; i++)
     {
-        mng->new_blocks(alloc_strategy(), make_bid_iterator(runs[i]->begin()), make_bid_iterator(runs[i]->end()));
+        mng->new_blocks(AllocStrategy(), make_bid_iterator(runs[i]->begin()), make_bid_iterator(runs[i]->end()));
     }
 #endif
 
-    create_runs<block_type,
-                run_type,
-                input_bid_iterator,
-                key_extractor>(input_bids, runs, nruns, m2, keyobj);
+    create_runs<block_type, run_type, InputBidIterator, KeyExtractor>(
+        input_bids, runs, nruns, m2, keyobj);
 
     after_runs_creation = timestamp();
 
@@ -637,7 +633,7 @@ ksort_blocks(input_bid_iterator input_bids, unsigned_type _n, unsigned_type _m,
         if (cur_out_run == 1 && blocks_in_new_run == int_type(_n) && !input_bids->is_managed())
         {
             // if we sort a file we can reuse the input bids for the output
-            input_bid_iterator cur = input_bids;
+            InputBidIterator cur = input_bids;
             for (int_type i = 0; cur != (input_bids + _n); ++cur)
             {
                 (*new_runs[0])[i++].bid = *cur;
@@ -660,12 +656,11 @@ ksort_blocks(input_bid_iterator input_bids, unsigned_type _n, unsigned_type _m,
         }
         else
         {
-            mng->new_blocks(interleaved_alloc_strategy(new_nruns, alloc_strategy()),
+            mng->new_blocks(interleaved_alloc_strategy(new_nruns, AllocStrategy()),
                             runs2bid_array_adaptor2<block_type::raw_size, run_type>(new_runs, 0, new_nruns, blocks_in_new_run),
                             runs2bid_array_adaptor2<block_type::raw_size, run_type>(new_runs, _n, new_nruns, blocks_in_new_run));
         }
 
-
         // merge all
         runs_left = nruns;
         cur_out_run = 0;
@@ -673,11 +668,11 @@ ksort_blocks(input_bid_iterator input_bids, unsigned_type _n, unsigned_type _m,
         {
             int_type runs2merge = STXXL_MIN(runs_left, merge_factor);
 #if STXXL_CHECK_ORDER_IN_SORTS
-            assert((check_ksorted_runs<block_type, run_type, key_extractor>(runs + nruns - runs_left, runs2merge, m2, keyobj)));
+            assert((check_ksorted_runs<block_type, run_type, KeyExtractor>(runs + nruns - runs_left, runs2merge, m2, keyobj)));
 #endif
             STXXL_VERBOSE("Merging " << runs2merge << " runs");
-            merge_runs<block_type, run_type, key_extractor>(runs + nruns - runs_left,
-                                                            runs2merge, *(new_runs + (cur_out_run++)), _m, keyobj);
+            merge_runs<block_type, run_type, KeyExtractor>(runs + nruns - runs_left,
+                                                           runs2merge, *(new_runs + (cur_out_run++)), _m, keyobj);
             runs_left -= runs2merge;
         }
 
@@ -695,7 +690,6 @@ ksort_blocks(input_bid_iterator input_bids, unsigned_type _n, unsigned_type _m,
                   after_runs_creation - begin << " s");
     STXXL_VERBOSE("Time in I/O wait(rf): " << io_wait_after_rf << " s");
     STXXL_VERBOSE(*stats::get_instance());
-    STXXL_UNUSED(begin + after_runs_creation + end + io_wait_after_rf);
 
     return result;
 }
@@ -703,7 +697,7 @@ ksort_blocks(input_bid_iterator input_bids, unsigned_type _n, unsigned_type _m,
 } // namespace ksort_local
 
 /*!
- * \brief Sort records with integer keys, see \ref design_algo_ksort.
+ * Sort records with integer keys, see \ref design_algo_ksort.
  *
  * stxxl::ksort sorts the elements in [first, last) into ascending order,
  * meaning that if \c i and \c j are any two valid iterators in [first, last)
@@ -730,11 +724,16 @@ ksort_blocks(input_bid_iterator input_bids, unsigned_type _n, unsigned_type _m,
 template <typename ExtIterator, typename KeyExtractor>
 void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_type M)
 {
-    typedef simple_vector<ksort_local::trigger_entry<typename ExtIterator::bid_type,
-                                                     typename KeyExtractor::key_type> > run_type;
+    typedef simple_vector<
+            ksort_local::trigger_entry<
+                typename ExtIterator::bid_type, typename KeyExtractor::key_type
+                >
+            > run_type;
     typedef typename ExtIterator::vector_type::value_type value_type;
+    typedef typename ExtIterator::bid_type bid_type;
     typedef typename ExtIterator::block_type block_type;
-
+    typedef typename ExtIterator::vector_type::alloc_strategy_type alloc_strategy_type;
+    typedef typename ExtIterator::bids_container_iterator bids_container_iterator;
 
     unsigned_type n = 0;
     block_manager* mng = block_manager::get_instance();
@@ -743,7 +742,8 @@ void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_ty
 
     if ((last - first) * sizeof(value_type) < M)
     {
-        stl_in_memory_sort(first, last, ksort_local::key_comparison<value_type, KeyExtractor>(keyobj));
+        stl_in_memory_sort(first, last,
+                           ksort_local::key_comparison<value_type, KeyExtractor>(keyobj));
     }
     else
     {
@@ -754,9 +754,9 @@ void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_ty
             if (last.block_offset())            // first and last element reside
             // not in the beginning of the block
             {
-                typename ExtIterator::block_type * first_block = new typename ExtIterator::block_type;
-                typename ExtIterator::block_type * last_block = new typename ExtIterator::block_type;
-                typename ExtIterator::bid_type first_bid, last_bid;
+                block_type* first_block = new block_type;
+                block_type* last_block = new block_type;
+                bid_type first_bid, last_bid;
                 request_ptr req;
 
                 req = first_block->read(*first.bid());
@@ -764,7 +764,6 @@ void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_ty
                 mng->new_block(FR(), last_bid);
                 req->wait();
 
-
                 req = last_block->read(*last.bid());
 
                 unsigned_type i = 0;
@@ -797,17 +796,14 @@ void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_ty
 
                 run_type* out =
                     ksort_local::ksort_blocks<
-                        typename ExtIterator::block_type,
-                        typename ExtIterator::vector_type::alloc_strategy_type,
-                        typename ExtIterator::bids_container_iterator,
-                        KeyExtractor>
-                        (first.bid(), n, M / block_type::raw_size, keyobj);
-
-
-                first_block = new typename ExtIterator::block_type;
-                last_block = new typename ExtIterator::block_type;
-                typename ExtIterator::block_type * sorted_first_block = new typename ExtIterator::block_type;
-                typename ExtIterator::block_type * sorted_last_block = new typename ExtIterator::block_type;
+                        block_type, alloc_strategy_type,
+                        bids_container_iterator, KeyExtractor
+                        >(first.bid(), n, M / block_type::raw_size, keyobj);
+
+                first_block = new block_type;
+                last_block = new block_type;
+                block_type* sorted_first_block = new block_type;
+                block_type* sorted_last_block = new block_type;
                 request_ptr* reqs = new request_ptr[2];
 
                 reqs[0] = first_block->read(first_bid);
@@ -832,7 +828,6 @@ void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_ty
 
                 req->wait();
 
-
                 req = last_block->write(last_bid);
 
                 mng->delete_block(out->begin()->bid);
@@ -843,7 +838,7 @@ void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_ty
 
                 typename run_type::iterator it = out->begin();
                 it++;
-                typename ExtIterator::bids_container_iterator cur_bid = first.bid();
+                bids_container_iterator cur_bid = first.bid();
                 cur_bid++;
 
                 for ( ; cur_bid != last.bid(); cur_bid++, it++)
@@ -866,15 +861,14 @@ void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_ty
                 // first element resides
                 // not in the beginning of the block
 
-                typename ExtIterator::block_type * first_block = new typename ExtIterator::block_type;
-                typename ExtIterator::bid_type first_bid;
+                block_type* first_block = new block_type;
+                bid_type first_bid;
                 request_ptr req;
 
                 req = first_block->read(*first.bid());
                 mng->new_block(FR(), first_bid);                // try to overlap
                 req->wait();
 
-
                 unsigned_type i = 0;
                 for ( ; i < first.block_offset(); i++)
                 {
@@ -893,16 +887,13 @@ void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_ty
 
                 run_type* out =
                     ksort_local::ksort_blocks<
-                        typename ExtIterator::block_type,
-                        typename ExtIterator::vector_type::alloc_strategy_type,
-                        typename ExtIterator::bids_container_iterator,
-                        KeyExtractor>
-                        (first.bid(), n, M / block_type::raw_size, keyobj);
-
+                        block_type, alloc_strategy_type,
+                        bids_container_iterator, KeyExtractor
+                        >(first.bid(), n, M / block_type::raw_size, keyobj);
 
-                first_block = new typename ExtIterator::block_type;
+                first_block = new block_type;
 
-                typename ExtIterator::block_type * sorted_first_block = new typename ExtIterator::block_type;
+                block_type* sorted_first_block = new block_type;
 
                 request_ptr* reqs = new request_ptr[2];
 
@@ -923,7 +914,7 @@ void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_ty
 
                 typename run_type::iterator it = out->begin();
                 it++;
-                typename ExtIterator::bids_container_iterator cur_bid = first.bid();
+                bids_container_iterator cur_bid = first.bid();
                 cur_bid++;
 
                 for ( ; cur_bid != last.bid(); cur_bid++, it++)
@@ -947,8 +938,8 @@ void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_ty
             if (last.block_offset())            // last element resides
             // not in the beginning of the block
             {
-                typename ExtIterator::block_type * last_block = new typename ExtIterator::block_type;
-                typename ExtIterator::bid_type last_bid;
+                block_type* last_block = new block_type;
+                bid_type last_bid;
                 request_ptr req;
                 unsigned_type i;
 
@@ -973,15 +964,12 @@ void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_ty
 
                 run_type* out =
                     ksort_local::ksort_blocks<
-                        typename ExtIterator::block_type,
-                        typename ExtIterator::vector_type::alloc_strategy_type,
-                        typename ExtIterator::bids_container_iterator,
-                        KeyExtractor>
-                        (first.bid(), n, M / block_type::raw_size, keyobj);
-
+                        block_type, alloc_strategy_type,
+                        bids_container_iterator, KeyExtractor
+                        >(first.bid(), n, M / block_type::raw_size, keyobj);
 
-                last_block = new typename ExtIterator::block_type;
-                typename ExtIterator::block_type * sorted_last_block = new typename ExtIterator::block_type;
+                last_block = new block_type;
+                block_type* sorted_last_block = new block_type;
                 request_ptr* reqs = new request_ptr[2];
 
                 reqs[0] = last_block->read(last_bid);
@@ -1000,7 +988,7 @@ void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_ty
                 *last.bid() = last_bid;
 
                 typename run_type::iterator it = out->begin();
-                typename ExtIterator::bids_container_iterator cur_bid = first.bid();
+                bids_container_iterator cur_bid = first.bid();
 
                 for ( ; cur_bid != last.bid(); cur_bid++, it++)
                 {
@@ -1022,14 +1010,12 @@ void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_ty
 
                 run_type* out =
                     ksort_local::ksort_blocks<
-                        typename ExtIterator::block_type,
-                        typename ExtIterator::vector_type::alloc_strategy_type,
-                        typename ExtIterator::bids_container_iterator,
-                        KeyExtractor>
-                        (first.bid(), n, M / block_type::raw_size, keyobj);
+                        block_type, alloc_strategy_type,
+                        bids_container_iterator, KeyExtractor
+                        >(first.bid(), n, M / block_type::raw_size, keyobj);
 
                 typename run_type::iterator it = out->begin();
-                typename ExtIterator::bids_container_iterator cur_bid = first.bid();
+                bids_container_iterator cur_bid = first.bid();
 
                 for ( ; cur_bid != last.bid(); cur_bid++, it++)
                 {
@@ -1048,26 +1034,26 @@ void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_ty
 #endif
 }
 
-template <typename record_type>
+template <typename RecordType>
 struct ksort_defaultkey
 {
-    typedef typename record_type::key_type key_type;
-    key_type operator () (const record_type& obj) const
+    typedef typename RecordType::key_type key_type;
+    key_type operator () (const RecordType& obj) const
     {
         return obj.key();
     }
-    record_type max_value() const
+    RecordType max_value() const
     {
-        return record_type::max_value();
+        return RecordType::max_value();
     }
-    record_type min_value() const
+    RecordType min_value() const
     {
-        return record_type::min_value();
+        return RecordType::min_value();
     }
 };
 
 /*!
- * \brief Sort records with integer keys, see \ref design_algo_ksort.
+ * Sort records with integer keys, see \ref design_algo_ksort.
  *
  * stxxl::ksort sorts the elements in [first, last) into ascending order,
  * meaning that if \c i and \c j are any two valid iterators in [first, last)
diff --git a/include/stxxl/bits/algo/losertree.h b/include/stxxl/bits/algo/losertree.h
index 544ad81..5820ccc 100644
--- a/include/stxxl/bits/algo/losertree.h
+++ b/include/stxxl/bits/algo/losertree.h
@@ -20,18 +20,17 @@
 #include <stxxl/bits/common/utils.h>
 #include <stxxl/bits/verbose.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-template <typename run_cursor_type,
-          typename run_cursor_cmp_type>
+template <typename RunCursorType,
+          typename RunCursorCmpType>
 class loser_tree : private noncopyable
 {
     int logK;
     int_type k;
     int_type* entry;
-    run_cursor_type* current;
-    run_cursor_cmp_type cmp;
+    RunCursorType* current;
+    RunCursorCmpType cmp;
 
     int_type init_winner(int_type root)
     {
@@ -57,13 +56,14 @@ class loser_tree : private noncopyable
     }
 
 public:
-    typedef typename run_cursor_type::prefetcher_type prefetcher_type;
-    typedef typename run_cursor_type::value_type value_type;
+    typedef typename RunCursorType::prefetcher_type prefetcher_type;
+    typedef typename RunCursorType::value_type value_type;
 
     loser_tree(
         prefetcher_type* p,
         int_type nruns,
-        run_cursor_cmp_type c) : cmp(c)
+        RunCursorCmpType c)
+        : cmp(c)
     {
         int_type i;
         logK = ilog2_ceil(nruns);
@@ -72,10 +72,10 @@ public:
         STXXL_VERBOSE2("loser_tree: logK=" << logK << " nruns=" << nruns << " K=" << kReg);
 
 #ifdef STXXL_SORT_SINGLE_PREFETCHER
-        current = new run_cursor_type[kReg];
-        run_cursor_type::set_prefetcher(p);
+        current = new RunCursorType[kReg];
+        RunCursorType::set_prefetcher(p);
 #else
-        current = new run_cursor_type[kReg];
+        current = new RunCursorType[kReg];
         for (i = 0; i < kReg; ++i)
             current[i].prefetcher() = p;
 #endif
@@ -115,7 +115,7 @@ private:
     template <int LogK>
     void multi_merge_unrolled(value_type* out_first, value_type* out_last)
     {
-        run_cursor_type* currentE, * winnerE;
+        RunCursorType* currentE, * winnerE;
         int_type* regEntry = entry;
         int_type winnerIndex = regEntry[0];
 
@@ -169,7 +169,7 @@ private:
 
     void multi_merge_k(value_type* out_first, value_type* out_last)
     {
-        run_cursor_type* currentE, * winnerE;
+        RunCursorType* currentE, * winnerE;
         int_type kReg = k;
         int_type winnerIndex = entry[0];
 
@@ -245,10 +245,10 @@ STXXL_END_NAMESPACE
 
 namespace std {
 
-template <typename run_cursor_type,
-          typename run_cursor_cmp_type>
-void swap(stxxl::loser_tree<run_cursor_type, run_cursor_cmp_type>& a,
-          stxxl::loser_tree<run_cursor_type, run_cursor_cmp_type>& b)
+template <typename RunCursorType,
+          typename RunCursorCmpType>
+void swap(stxxl::loser_tree<RunCursorType, RunCursorCmpType>& a,
+          stxxl::loser_tree<RunCursorType, RunCursorCmpType>& b)
 {
     a.swap(b);
 }
diff --git a/include/stxxl/bits/algo/random_shuffle.h b/include/stxxl/bits/algo/random_shuffle.h
index 19ad177..a4a3970 100644
--- a/include/stxxl/bits/algo/random_shuffle.h
+++ b/include/stxxl/bits/algo/random_shuffle.h
@@ -19,19 +19,15 @@
 //        (free stacks buffers)
 // TODO: shuffle small input in internal memory
 
-
 #include <stxxl/bits/stream/stream.h>
 #include <stxxl/scan>
 #include <stxxl/stack>
 
-
 STXXL_BEGIN_NAMESPACE
 
-
 //! \addtogroup stlalgo
 //! \{
 
-
 //! External equivalent of std::random_shuffle
 //! \param first begin of the range to shuffle
 //! \param last end of the range to shuffle
@@ -39,50 +35,51 @@ STXXL_BEGIN_NAMESPACE
 //! \param M number of bytes for internal use
 //! \param AS parallel disk allocation strategy
 //!
-//! - BlockSize_ size of the block to use for external memory data structures
-//! - PageSize_ page size in blocks to use for external memory data structures
-template <typename ExtIterator_,
-          typename RandomNumberGenerator_,
-          unsigned BlockSize_,
-          unsigned PageSize_,
-          typename AllocStrategy_>
-void random_shuffle(ExtIterator_ first,
-                    ExtIterator_ last,
-                    RandomNumberGenerator_& rand,
+//! - BlockSize size of the block to use for external memory data structures
+//! - PageSize page size in blocks to use for external memory data structures
+template <typename ExtIterator,
+          typename RandomNumberGenerator,
+          unsigned BlockSize,
+          unsigned PageSize,
+          typename AllocStrategy>
+void random_shuffle(ExtIterator first,
+                    ExtIterator last,
+                    RandomNumberGenerator& rand,
                     unsigned_type M,
-                    AllocStrategy_ AS = STXXL_DEFAULT_ALLOC_STRATEGY())
+                    AllocStrategy AS = STXXL_DEFAULT_ALLOC_STRATEGY())
 {
     STXXL_UNUSED(AS);  // FIXME: Why is this not being used?
-    typedef typename ExtIterator_::value_type value_type;
-    typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external,
-                                            stxxl::grow_shrink2, PageSize_,
-                                            BlockSize_, void, 0, AllocStrategy_>::result stack_type;
+    typedef typename ExtIterator::value_type value_type;
+    typedef typename STACK_GENERATOR<
+            value_type, external, grow_shrink2, PageSize,
+            BlockSize, void, 0, AllocStrategy
+            >::result stack_type;
     typedef typename stack_type::block_type block_type;
 
     STXXL_VERBOSE1("random_shuffle: Plain Version");
-    STXXL_STATIC_ASSERT(int(BlockSize_) < 0 && "This implementation was never tested. Please report to the stxxl developers if you have an ExtIterator_ that works with this implementation.");
+    STXXL_STATIC_ASSERT(int(BlockSize) < 0 && "This implementation was never tested. Please report to the stxxl developers if you have an ExtIterator that works with this implementation.");
 
-    stxxl::int64 n = last - first; // the number of input elements
+    int64 n = last - first; // the number of input elements
 
     // make sure we have at least 6 blocks + 1 page
-    if (M < 6 * BlockSize_ + PageSize_ * BlockSize_) {
+    if (M < 6 * BlockSize + PageSize * BlockSize) {
         STXXL_ERRMSG("random_shuffle: insufficient memory, " << M << " bytes supplied,");
-        M = 6 * BlockSize_ + PageSize_ * BlockSize_;
+        M = 6 * BlockSize + PageSize * BlockSize;
         STXXL_ERRMSG("random_shuffle: increasing to " << M << " bytes (6 blocks + 1 page)");
     }
 
-    int_type k = M / (3 * BlockSize_); // number of buckets
+    int_type k = M / (3 * BlockSize); // number of buckets
 
-
-    stxxl::int64 i, j, size = 0;
+    int64 i, j, size = 0;
 
     value_type* temp_array;
-    typedef typename stxxl::VECTOR_GENERATOR<value_type,
-                                             PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type;
+    typedef typename VECTOR_GENERATOR<
+            value_type, PageSize, 4, BlockSize, AllocStrategy
+            >::result temp_vector_type;
     temp_vector_type* temp_vector;
 
-    STXXL_VERBOSE1("random_shuffle: " << M / BlockSize_ - k << " write buffers for " << k << " buckets");
-    stxxl::read_write_pool<block_type> pool(0, M / BlockSize_ - k);  // no read buffers and M/B-k write buffers
+    STXXL_VERBOSE1("random_shuffle: " << M / BlockSize - k << " write buffers for " << k << " buckets");
+    read_write_pool<block_type> pool(0, M / BlockSize - k);  // no read buffers and M/B-k write buffers
 
     stack_type** buckets;
 
@@ -91,10 +88,9 @@ void random_shuffle(ExtIterator_ first,
     for (j = 0; j < k; j++)
         buckets[j] = new stack_type(pool, 0);
 
-
     ///// Reading input /////////////////////
-    typedef typename stream::streamify_traits<ExtIterator_>::stream_type input_stream;
-    input_stream in = stxxl::stream::streamify(first, last);
+    typedef typename stream::streamify_traits<ExtIterator>::stream_type input_stream;
+    input_stream in = stream::streamify(first, last);
 
     // distribute input into random buckets
     int_type random_bucket = 0;
@@ -107,12 +103,12 @@ void random_shuffle(ExtIterator_ first,
     ///// Processing //////////////////////
     // resize buffers
     pool.resize_write(0);
-    pool.resize_prefetch(PageSize_);
+    pool.resize_prefetch(PageSize);
 
-    unsigned_type space_left = M - k * BlockSize_ -
-                               PageSize_ * BlockSize_; // remaining int space
-    ExtIterator_ Writer = first;
-    ExtIterator_ it = first;
+    unsigned_type space_left = M - k * BlockSize -
+                               PageSize * BlockSize; // remaining int space
+    ExtIterator Writer = first;
+    ExtIterator it = first;
 
     for (i = 0; i < k; i++) {
         STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements");
@@ -120,7 +116,7 @@ void random_shuffle(ExtIterator_ first,
 
     // shuffle each bucket
     for (i = 0; i < k; i++) {
-        buckets[i]->set_prefetch_aggr(PageSize_);
+        buckets[i]->set_prefetch_aggr(PageSize);
         size = buckets[i]->size();
 
         // does the bucket fit into memory?
@@ -159,14 +155,14 @@ void random_shuffle(ExtIterator_ first,
             }
 
             pool.resize_prefetch(0);
-            space_left += PageSize_ * BlockSize_;
+            space_left += PageSize * BlockSize;
             STXXL_VERBOSE1("random_shuffle: Space left: " << space_left);
 
             // recursive shuffle
             stxxl::random_shuffle(temp_vector->begin(),
                                   temp_vector->end(), rand, space_left);
 
-            pool.resize_prefetch(PageSize_);
+            pool.resize_prefetch(PageSize);
 
             // write back
             for (j = 0; j < size; j++) {
@@ -180,7 +176,7 @@ void random_shuffle(ExtIterator_ first,
 
         // free bucket
         delete buckets[i];
-        space_left += BlockSize_;
+        space_left += BlockSize;
     }
 
     delete[] buckets;
@@ -191,39 +187,45 @@ void random_shuffle(ExtIterator_ first,
 //! \param last end of the range to shuffle
 //! \param rand random number generator object (functor)
 //! \param M number of bytes for internal use
-template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_,
-          unsigned BlockSize_, typename PgTp_, unsigned PageSize_, typename RandomNumberGenerator_>
-void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first,
-                    stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> last,
-                    RandomNumberGenerator_& rand,
-                    unsigned_type M)
+template <typename Type, typename AllocStrategy, typename SizeType, typename DiffType,
+          unsigned BlockSize, typename PageType, unsigned PageSize, typename RandomNumberGenerator>
+void random_shuffle(
+    stxxl::vector_iterator<Type, AllocStrategy, SizeType, DiffType,
+                           BlockSize, PageType, PageSize> first,
+    stxxl::vector_iterator<Type, AllocStrategy, SizeType, DiffType,
+                           BlockSize, PageType, PageSize> last,
+    RandomNumberGenerator& rand,
+    unsigned_type M)
 {
-    typedef stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> ExtIterator_;
-    typedef typename ExtIterator_::value_type value_type;
+    typedef stxxl::vector_iterator<Type, AllocStrategy, SizeType, DiffType, BlockSize, PageType, PageSize> ExtIterator;
+    typedef typename ExtIterator::value_type value_type;
+    typedef typename ExtIterator::bids_container_iterator bids_container_iterator;
     typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external,
-                                            stxxl::grow_shrink2, PageSize_, BlockSize_>::result stack_type;
+                                            stxxl::grow_shrink2, PageSize, BlockSize>::result stack_type;
     typedef typename stack_type::block_type block_type;
 
     STXXL_VERBOSE1("random_shuffle: Vector Version");
 
     // make sure we have at least 6 blocks + 1 page
-    if (M < 6 * BlockSize_ + PageSize_ * BlockSize_) {
+    if (M < 6 * BlockSize + PageSize * BlockSize) {
         STXXL_ERRMSG("random_shuffle: insufficient memory, " << M << " bytes supplied,");
-        M = 6 * BlockSize_ + PageSize_ * BlockSize_;
+        M = 6 * BlockSize + PageSize * BlockSize;
         STXXL_ERRMSG("random_shuffle: increasing to " << M << " bytes (6 blocks + 1 page)");
     }
 
-    stxxl::int64 n = last - first;     // the number of input elements
-    int_type k = M / (3 * BlockSize_); // number of buckets
+    stxxl::int64 n = last - first;    // the number of input elements
+    int_type k = M / (3 * BlockSize); // number of buckets
 
     stxxl::int64 i, j, size = 0;
 
     value_type* temp_array;
-    typedef typename stxxl::VECTOR_GENERATOR<value_type,
-                                             PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type;
+    typedef typename stxxl::VECTOR_GENERATOR<
+            value_type, PageSize, 4, BlockSize, AllocStrategy
+            >::result temp_vector_type;
     temp_vector_type* temp_vector;
 
-    stxxl::read_write_pool<block_type> pool(0, M / BlockSize_ - k);  // no read buffers and M/B-k write buffers
+    // no read buffers and M/B-k write buffers
+    stxxl::read_write_pool<block_type> pool(0, M / BlockSize - k);
 
     stack_type** buckets;
 
@@ -232,9 +234,8 @@ void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_,
     for (j = 0; j < k; j++)
         buckets[j] = new stack_type(pool, 0);
 
-
-    typedef buf_istream<block_type, typename ExtIterator_::bids_container_iterator> buf_istream_type;
-    typedef buf_ostream<block_type, typename ExtIterator_::bids_container_iterator> buf_ostream_type;
+    typedef buf_istream<block_type, bids_container_iterator> buf_istream_type;
+    typedef buf_ostream<block_type, bids_container_iterator> buf_ostream_type;
 
     first.flush();     // flush container
 
@@ -243,12 +244,12 @@ void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_,
     // create buffered write stream for blocks
     buf_ostream_type out(first.bid(), 2);
 
-    ExtIterator_ _cur = first - first.block_offset();
+    ExtIterator _cur = first - first.block_offset();
 
     // leave part of the block before _begin untouched (e.g. copy)
     for ( ; _cur != first; ++_cur)
     {
-        typename ExtIterator_::value_type tmp;
+        typename ExtIterator::value_type tmp;
         in >> tmp;
         out << tmp;
     }
@@ -259,7 +260,7 @@ void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_,
     int_type random_bucket = 0;
     for (i = 0; i < n; ++i, ++_cur) {
         random_bucket = rand((unsigned)k);
-        typename ExtIterator_::value_type tmp;
+        typename ExtIterator::value_type tmp;
         in >> tmp;
         buckets[random_bucket]->push(tmp); // reading the current input element
     }
@@ -267,10 +268,10 @@ void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_,
     ///// Processing //////////////////////
     // resize buffers
     pool.resize_write(0);
-    pool.resize_prefetch(PageSize_);
+    pool.resize_prefetch(PageSize);
 
-    unsigned_type space_left = M - k * BlockSize_ -
-                               PageSize_ * BlockSize_; // remaining int space
+    // remaining int space
+    unsigned_type space_left = M - k * BlockSize - PageSize * BlockSize;
 
     for (i = 0; i < k; i++) {
         STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements");
@@ -278,7 +279,7 @@ void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_,
 
     // shuffle each bucket
     for (i = 0; i < k; i++) {
-        buckets[i]->set_prefetch_aggr(PageSize_);
+        buckets[i]->set_prefetch_aggr(PageSize);
         size = buckets[i]->size();
 
         // does the bucket fit into memory?
@@ -286,7 +287,7 @@ void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_,
             STXXL_VERBOSE1("random_shuffle: no recursion");
 
             // copy bucket into temp. array
-            temp_array = new value_type[size];
+            temp_array = new value_type[(size_t)size];
             for (j = 0; j < size; j++) {
                 temp_array[j] = buckets[i]->top();
                 buckets[i]->pop();
@@ -298,7 +299,7 @@ void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_,
 
             // write back
             for (j = 0; j < size; j++) {
-                typename ExtIterator_::value_type tmp;
+                typename ExtIterator::value_type tmp;
                 tmp = temp_array[j];
                 out << tmp;
             }
@@ -316,7 +317,7 @@ void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_,
             }
 
             pool.resize_prefetch(0);
-            space_left += PageSize_ * BlockSize_;
+            space_left += PageSize * BlockSize;
 
             STXXL_VERBOSE1("random_shuffle: Space left: " << space_left);
 
@@ -324,11 +325,11 @@ void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_,
             stxxl::random_shuffle(temp_vector->begin(),
                                   temp_vector->end(), rand, space_left);
 
-            pool.resize_prefetch(PageSize_);
+            pool.resize_prefetch(PageSize);
 
             // write back
             for (j = 0; j < size; j++) {
-                typename ExtIterator_::value_type tmp;
+                typename ExtIterator::value_type tmp;
                 tmp = (*temp_vector)[j];
                 out << tmp;
             }
@@ -339,7 +340,7 @@ void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_,
 
         // free bucket
         delete buckets[i];
-        space_left += BlockSize_;
+        space_left += BlockSize;
     }
 
     delete[] buckets;
@@ -347,10 +348,10 @@ void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_,
     // leave part of the block after _end untouched
     if (last.block_offset())
     {
-        ExtIterator_ _last_block_end = last + (block_type::size - last.block_offset());
-        for ( ; _cur != _last_block_end; ++_cur)
+        ExtIterator last_block_end = last + (block_type::size - last.block_offset());
+        for ( ; _cur != last_block_end; ++_cur)
         {
-            typename ExtIterator_::value_type tmp;
+            typename ExtIterator::value_type tmp;
             in >> tmp;
             out << tmp;
         }
@@ -361,12 +362,15 @@ void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_,
 //! \param first begin of the range to shuffle
 //! \param last end of the range to shuffle
 //! \param M number of bytes for internal use
-template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_,
-          unsigned BlockSize_, typename PgTp_, unsigned PageSize_>
+template <typename Type, typename AllocStrategy, typename SizeType, typename DiffType,
+          unsigned BlockSize, typename PageType, unsigned PageSize>
 inline
-void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first,
-                    stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> last,
-                    unsigned_type M)
+void random_shuffle(
+    stxxl::vector_iterator<Type, AllocStrategy, SizeType, DiffType,
+                           BlockSize, PageType, PageSize> first,
+    stxxl::vector_iterator<Type, AllocStrategy, SizeType, DiffType,
+                           BlockSize, PageType, PageSize> last,
+    unsigned_type M)
 {
     stxxl::random_number<> rand;
     stxxl::random_shuffle(first, last, rand, M);
diff --git a/include/stxxl/bits/algo/run_cursor.h b/include/stxxl/bits/algo/run_cursor.h
index 32ca09d..7948b33 100644
--- a/include/stxxl/bits/algo/run_cursor.h
+++ b/include/stxxl/bits/algo/run_cursor.h
@@ -17,18 +17,17 @@
 #include <cstdlib>
 #include <stxxl/bits/common/types.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-template <typename block_type>
+template <typename BlockType>
 struct run_cursor
 {
     unsigned_type pos;
-    block_type* buffer;
+    BlockType* buffer;
 
     run_cursor() : pos(0), buffer(NULL) { }
 
-    inline typename block_type::const_reference current() const
+    inline typename BlockType::const_reference current() const
     {
         return (*buffer)[pos];
     }
@@ -40,7 +39,7 @@ struct run_cursor
 
 #ifdef STXXL_SORT_SINGLE_PREFETCHER
 
-template <typename must_be_void = void>
+template <typename MustBeVoid = void>
 struct have_prefetcher
 {
     static void* untyped_prefetcher;
@@ -48,14 +47,15 @@ struct have_prefetcher
 
 #endif
 
-template <typename block_type,
-          typename prefetcher_type_>
-struct run_cursor2 : public run_cursor<block_type>
+template <typename BlockType,
+          typename PrefetcherType>
+struct run_cursor2 : public run_cursor<BlockType>
 #ifdef STXXL_SORT_SINGLE_PREFETCHER
                      , public have_prefetcher<>
 #endif
 {
-    typedef prefetcher_type_ prefetcher_type;
+    typedef BlockType block_type;
+    typedef PrefetcherType prefetcher_type;
     typedef run_cursor2<block_type, prefetcher_type> _Self;
     typedef typename block_type::value_type value_type;
 
@@ -86,7 +86,16 @@ struct run_cursor2 : public run_cursor<block_type>
     {
         return (pos >= block_type::size);
     }
-    inline void operator ++ ();
+    inline void operator ++ ()
+    {
+        assert(!empty());
+        ++pos;
+        if (UNLIKELY(pos >= block_type::size))
+        {
+            if (prefetcher()->block_consumed(buffer))
+                pos = 0;
+        }
+    }
     inline void make_inf()
     {
         pos = block_type::size;
@@ -94,24 +103,10 @@ struct run_cursor2 : public run_cursor<block_type>
 };
 
 #ifdef STXXL_SORT_SINGLE_PREFETCHER
-template <typename must_be_void>
-void* have_prefetcher<must_be_void>::untyped_prefetcher = NULL;
+template <typename MustBeVoid>
+void* have_prefetcher<MustBeVoid>::untyped_prefetcher = NULL;
 #endif
 
-template <typename block_type,
-          typename prefetcher_type>
-void run_cursor2<block_type, prefetcher_type>::operator ++ ()
-{
-    assert(!empty());
-    ++pos;
-    if (UNLIKELY(pos >= block_type::size))
-    {
-        if (prefetcher()->block_consumed(buffer))
-            pos = 0;
-    }
-}
-
-
 #if 0
 template <typename block_type>
 struct run_cursor_cmp
@@ -127,6 +122,5 @@ struct run_cursor_cmp
 
 STXXL_END_NAMESPACE
 
-
 #endif // !STXXL_ALGO_RUN_CURSOR_HEADER
 // vim: et:ts=4:sw=4
diff --git a/include/stxxl/bits/algo/scan.h b/include/stxxl/bits/algo/scan.h
index 40dc7be..d769860 100644
--- a/include/stxxl/bits/algo/scan.h
+++ b/include/stxxl/bits/algo/scan.h
@@ -20,14 +20,13 @@
 #include <stxxl/bits/mng/buf_istream.h>
 #include <stxxl/bits/mng/buf_ostream.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup stlalgo
 //! \{
 
 /*!
- * \brief External equivalent of std::for_each, see \ref design_algo_foreach.
+ * External equivalent of std::for_each, see \ref design_algo_foreach.
  *
  * stxxl::for_each applies the function object \c functor to each element in
  * the range [first, last); \c functor's return value, if any, is
@@ -48,12 +47,18 @@ STXXL_BEGIN_NAMESPACE
  * \warning nested stxxl::for_each are not supported
  */
 template <typename ExtIterator, typename UnaryFunction>
-UnaryFunction for_each(ExtIterator begin, ExtIterator end, UnaryFunction functor, int_type nbuffers = 0)
+UnaryFunction for_each(ExtIterator begin, ExtIterator end,
+                       UnaryFunction functor, int_type nbuffers = 0)
 {
     if (begin == end)
         return functor;
 
-    typedef buf_istream<typename ExtIterator::block_type, typename ExtIterator::bids_container_iterator> buf_istream_type;
+    typedef typename ExtIterator::value_type value_type;
+
+    typedef buf_istream<
+            typename ExtIterator::block_type,
+            typename ExtIterator::bids_container_iterator
+            > buf_istream_type;
 
     begin.flush();     // flush container
 
@@ -68,14 +73,14 @@ UnaryFunction for_each(ExtIterator begin, ExtIterator end, UnaryFunction functor
     // leave part of the block before begin untouched (e.g. copy)
     for ( ; cur != begin; ++cur)
     {
-        typename ExtIterator::value_type tmp;
+        value_type tmp;
         in >> tmp;
     }
 
     // apply functor to the range [begin,end)
     for ( ; cur != end; ++cur)
     {
-        typename ExtIterator::value_type tmp;
+        value_type tmp;
         in >> tmp;
         functor(tmp);
     }
@@ -83,10 +88,10 @@ UnaryFunction for_each(ExtIterator begin, ExtIterator end, UnaryFunction functor
     // leave part of the block after end untouched
     if (end.block_offset())
     {
-        ExtIterator _last_block_end = end - end.block_offset() + ExtIterator::block_type::size;
-        for ( ; cur != _last_block_end; ++cur)
+        ExtIterator last_block_end = end - end.block_offset() + ExtIterator::block_type::size;
+        for ( ; cur != last_block_end; ++cur)
         {
-            typename ExtIterator::value_type tmp;
+            value_type tmp;
             in >> tmp;
         }
     }
@@ -94,9 +99,8 @@ UnaryFunction for_each(ExtIterator begin, ExtIterator end, UnaryFunction functor
     return functor;
 }
 
-
 /*!
- * \brief External equivalent of std::for_each (mutating), see \ref design_algo_foreachm
+ * External equivalent of std::for_each (mutating), see \ref design_algo_foreachm
  *
  * stxxl::for_each_m applies the function object \c functor to each element in
  * the range [first, last); \c functor's return value, if any, is
@@ -118,13 +122,23 @@ UnaryFunction for_each(ExtIterator begin, ExtIterator end, UnaryFunction functor
  * \warning nested stxxl::for_each_m are not supported
  */
 template <typename ExtIterator, typename UnaryFunction>
-UnaryFunction for_each_m(ExtIterator begin, ExtIterator end, UnaryFunction functor, int_type nbuffers = 0)
+UnaryFunction for_each_m(ExtIterator begin, ExtIterator end,
+                         UnaryFunction functor, int_type nbuffers = 0)
 {
     if (begin == end)
         return functor;
 
-    typedef buf_istream<typename ExtIterator::block_type, typename ExtIterator::bids_container_iterator> buf_istream_type;
-    typedef buf_ostream<typename ExtIterator::block_type, typename ExtIterator::bids_container_iterator> buf_ostream_type;
+    typedef typename ExtIterator::value_type value_type;
+
+    typedef buf_istream<
+            typename ExtIterator::block_type,
+            typename ExtIterator::bids_container_iterator
+            > buf_istream_type;
+
+    typedef buf_ostream<
+            typename ExtIterator::block_type,
+            typename ExtIterator::bids_container_iterator
+            > buf_ostream_type;
 
     begin.flush();     // flush container
 
@@ -143,7 +157,7 @@ UnaryFunction for_each_m(ExtIterator begin, ExtIterator end, UnaryFunction funct
     // leave part of the block before begin untouched (e.g. copy)
     for ( ; cur != begin; ++cur)
     {
-        typename ExtIterator::value_type tmp;
+        value_type tmp;
         in >> tmp;
         out << tmp;
     }
@@ -151,7 +165,7 @@ UnaryFunction for_each_m(ExtIterator begin, ExtIterator end, UnaryFunction funct
     // apply functor to the range [begin,end)
     for ( ; cur != end; ++cur)
     {
-        typename ExtIterator::value_type tmp;
+        value_type tmp;
         in >> tmp;
         functor(tmp);
         out << tmp;
@@ -163,7 +177,7 @@ UnaryFunction for_each_m(ExtIterator begin, ExtIterator end, UnaryFunction funct
         ExtIterator _last_block_end = end - end.block_offset() + ExtIterator::block_type::size;
         for ( ; cur != _last_block_end; ++cur)
         {
-            typename ExtIterator::value_type tmp;
+            value_type tmp;
             in >> tmp;
             out << tmp;
         }
@@ -172,9 +186,8 @@ UnaryFunction for_each_m(ExtIterator begin, ExtIterator end, UnaryFunction funct
     return functor;
 }
 
-
 /*!
- * \brief External equivalent of std::generate, see \ref design_algo_generate.
+ * External equivalent of std::generate, see \ref design_algo_generate.
  *
  * Generate assigns the result of invoking \c generator, a function object that
  * takes no arguments, to each element in the range [first, last). To overlap
@@ -191,11 +204,14 @@ UnaryFunction for_each_m(ExtIterator begin, ExtIterator end, UnaryFunction funct
  * \param nbuffers number of buffers (blocks) for internal use (should be at least 2*D, or zero for automaticl 2*D)
  */
 template <typename ExtIterator, typename Generator>
-void generate(ExtIterator begin, ExtIterator end, Generator generator, int_type nbuffers = 0)
+void generate(ExtIterator begin, ExtIterator end,
+              Generator generator, int_type nbuffers = 0)
 {
     typedef typename ExtIterator::block_type block_type;
-    typedef buf_ostream<block_type, typename ExtIterator::bids_container_iterator> buf_ostream_type;
 
+    typedef buf_ostream<
+            block_type, typename ExtIterator::bids_container_iterator
+            > buf_ostream_type;
 
     while (begin.block_offset())    //  go to the beginning of the block
     //  of the external vector
@@ -250,9 +266,8 @@ void generate(ExtIterator begin, ExtIterator end, Generator generator, int_type
     begin.flush();
 }
 
-
 /*!
- * \brief External equivalent of std::find, see \ref design_algo_find.
+ * External equivalent of std::find, see \ref design_algo_find.
  *
  * Returns the first iterator \a i in the range [first, last) such that <tt>*i
  * == value</tt>. Returns last if no such iterator exists.  To overlap I/O and
@@ -271,12 +286,16 @@ void generate(ExtIterator begin, ExtIterator end, Generator generator, int_type
  *         such exists then \c end
  */
 template <typename ExtIterator, typename EqualityComparable>
-ExtIterator find(ExtIterator begin, ExtIterator end, const EqualityComparable& value, int_type nbuffers = 0)
+ExtIterator find(ExtIterator begin, ExtIterator end,
+                 const EqualityComparable& value, int_type nbuffers = 0)
 {
     if (begin == end)
         return end;
 
-    typedef buf_istream<typename ExtIterator::block_type, typename ExtIterator::bids_container_iterator> buf_istream_type;
+    typedef buf_istream<
+            typename ExtIterator::block_type,
+            typename ExtIterator::bids_container_iterator
+            > buf_istream_type;
 
     begin.flush();     // flush container
 
diff --git a/include/stxxl/bits/algo/sort.h b/include/stxxl/bits/algo/sort.h
index 4301621..f1dabe9 100644
--- a/include/stxxl/bits/algo/sort.h
+++ b/include/stxxl/bits/algo/sort.h
@@ -39,22 +39,21 @@
 #include <stxxl/bits/parallel.h>
 #include <stxxl/bits/common/is_sorted.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup stlalgo
 //! \{
 
-
 /*! \internal
  */
 namespace sort_local {
 
-template <typename block_type, typename bid_type>
+template <typename BlockType, typename BidType>
 struct read_next_after_write_completed
 {
+    typedef BlockType block_type;
     block_type* block;
-    bid_type bid;
+    BidType bid;
     request_ptr* req;
     void operator () (request* /*completed_req*/)
     {
@@ -62,20 +61,22 @@ struct read_next_after_write_completed
     }
 };
 
-
 template <
-    typename block_type,
-    typename run_type,
-    typename input_bid_iterator,
-    typename value_cmp>
+    typename BlockType,
+    typename RunType,
+    typename InputBidIterator,
+    typename ValueCmp>
 void
 create_runs(
-    input_bid_iterator it,
-    run_type** runs,
+    InputBidIterator it,
+    RunType** runs,
     int_type nruns,
     int_type _m,
-    value_cmp cmp)
+    ValueCmp cmp)
 {
+    typedef BlockType block_type;
+    typedef RunType run_type;
+
     typedef typename block_type::bid_type bid_type;
     STXXL_VERBOSE1("stxxl::create_runs nruns=" << nruns << " m=" << _m);
 
@@ -103,7 +104,7 @@ create_runs(
 
     for (i = 0; i < run_size; ++i)
     {
-        STXXL_VERBOSE1("stxxl::create_runs posting read " << long(Blocks1[i].elem));
+        STXXL_VERBOSE1("stxxl::create_runs posting read " << Blocks1[i].elem);
         bids1[i] = *(it++);
         read_reqs1[i] = Blocks1[i].read(bids1[i]);
     }
@@ -112,7 +113,7 @@ create_runs(
 
     for (i = 0; i < run_size; ++i)
     {
-        STXXL_VERBOSE1("stxxl::create_runs posting read " << long(Blocks2[i].elem));
+        STXXL_VERBOSE1("stxxl::create_runs posting read " << Blocks2[i].elem);
         bids2[i] = *(it++);
         read_reqs2[i] = Blocks2[i].read(bids2[i]);
     }
@@ -122,10 +123,8 @@ create_runs(
         run_type* run = runs[k];
         run_size = run->size();
         assert(run_size == m2);
-            #ifndef NDEBUG
         int_type next_run_size = runs[k + 1]->size();
-            #endif
-        assert((next_run_size == m2) || (next_run_size <= m2 && k == nruns - 2));
+        STXXL_ASSERT((next_run_size == m2) || (next_run_size <= m2 && k == nruns - 2));
 
         STXXL_VERBOSE1("stxxl::create_runs start waiting read_reqs1");
         wait_all(read_reqs1, run_size);
@@ -147,7 +146,7 @@ create_runs(
         int_type runplus2size = (k < nruns - 2) ? runs[k + 2]->size() : 0;
         for (i = 0; i < m2; ++i)
         {
-            STXXL_VERBOSE1("stxxl::create_runs posting write " << long(Blocks1[i].elem));
+            STXXL_VERBOSE1("stxxl::create_runs posting write " << Blocks1[i].elem);
             (*run)[i].value = Blocks1[i][0];
             if (i >= runplus2size) {
                 write_reqs[i] = Blocks1[i].write((*run)[i].bid);
@@ -185,7 +184,7 @@ create_runs(
 
     for (i = 0; i < run_size; ++i)
     {
-        STXXL_VERBOSE1("stxxl::create_runs posting write " << long(Blocks1[i].elem));
+        STXXL_VERBOSE1("stxxl::create_runs posting write " << Blocks1[i].elem);
         (*run)[i].value = Blocks1[i][0];
         write_reqs[i] = Blocks1[i].write((*run)[i].bid);
     }
@@ -204,13 +203,13 @@ create_runs(
     delete[] next_run_reads;
 }
 
-
-template <typename block_type, typename run_type, typename value_cmp>
-bool check_sorted_runs(run_type** runs,
+template <typename BlockType, typename RunType, typename ValueCmp>
+bool check_sorted_runs(RunType** runs,
                        unsigned_type nruns,
                        unsigned_type m,
-                       value_cmp cmp)
+                       ValueCmp cmp)
 {
+    typedef BlockType block_type;
     typedef typename block_type::value_type value_type;
 
     STXXL_MSG("check_sorted_runs  Runs: " << nruns);
@@ -304,10 +303,13 @@ bool check_sorted_runs(run_type** runs,
     return true;
 }
 
-
-template <typename block_type, typename run_type, typename value_cmp>
-void merge_runs(run_type** in_runs, int_type nruns, run_type* out_run, unsigned_type _m, value_cmp cmp)
+template <typename BlockType, typename RunType, typename ValueCmp>
+void merge_runs(RunType** in_runs, int_type nruns,
+                RunType* out_run, unsigned_type _m, ValueCmp cmp)
 {
+    typedef BlockType block_type;
+    typedef RunType run_type;
+    typedef ValueCmp value_cmp;
     typedef typename run_type::value_type trigger_entry_type;
     typedef block_prefetcher<block_type, typename run_type::iterator> prefetcher_type;
     typedef run_cursor2<block_type, prefetcher_type> run_cursor_type;
@@ -320,7 +322,7 @@ void merge_runs(run_type** in_runs, int_type nruns, run_type* out_run, unsigned_
     typename run_type::iterator copy_start = consume_seq.begin();
     for (int_type i = 0; i < nruns; i++)
     {
-        // TODO: try to avoid copy
+        // \todo: try to avoid copy
         copy_start = std::copy(
             in_runs[i]->begin(),
             in_runs[i]->end(),
@@ -348,7 +350,7 @@ void merge_runs(run_type** in_runs, int_type nruns, run_type* out_run, unsigned_
         consume_seq,
         prefetch_seq,
         n_opt_prefetch_buffers,
-        disks_number);
+        config::get_instance()->get_max_device_id());
 #else
     for (unsigned_type i = 0; i < out_run->size(); i++)
         prefetch_seq[i] = i;
@@ -499,23 +501,24 @@ void merge_runs(run_type** in_runs, int_type nruns, run_type* out_run, unsigned_
         for (unsigned_type j = 0; j < sz; ++j)
             bm->delete_block((*in_runs[i])[j].bid);
 
-
         delete in_runs[i];
     }
 }
 
-
-template <typename block_type,
-          typename alloc_strategy,
-          typename input_bid_iterator,
-          typename value_cmp>
-simple_vector<sort_helper::trigger_entry<block_type> >*
-sort_blocks(input_bid_iterator input_bids,
+template <typename BlockType,
+          typename AllocStrategy,
+          typename InputBidIterator,
+          typename ValueCmp>
+simple_vector<sort_helper::trigger_entry<BlockType> >*
+sort_blocks(InputBidIterator input_bids,
             unsigned_type _n,
             unsigned_type _m,
-            value_cmp cmp
-            )
+            ValueCmp cmp)
 {
+    typedef BlockType block_type;
+    typedef AllocStrategy alloc_strategy;
+    typedef InputBidIterator input_bid_iterator;
+    typedef ValueCmp value_cmp;
     typedef typename block_type::bid_type bid_type;
     typedef sort_helper::trigger_entry<block_type> trigger_entry_type;
     typedef simple_vector<trigger_entry_type> run_type;
@@ -538,7 +541,6 @@ sort_blocks(input_bid_iterator input_bids,
     for (i = 0; i < full_runs; i++)
         runs[i] = new run_type(m2);
 
-
     if (partial_runs)
         runs[i] = new run_type(_n - full_runs * m2);
 
@@ -643,7 +645,6 @@ sort_blocks(input_bid_iterator input_bids,
                   after_runs_creation - begin << " s");
     STXXL_VERBOSE("Time in I/O wait(rf): " << io_wait_after_rf << " s");
     STXXL_VERBOSE(*stats::get_instance());
-    STXXL_UNUSED(begin + after_runs_creation + end + io_wait_after_rf);
 
     return result;
 }
@@ -651,7 +652,7 @@ sort_blocks(input_bid_iterator input_bids,
 } // namespace sort_local
 
 /*!
- * \brief Sort records comparison-based, see \ref design_algo_sort.
+ * Sort records comparison-based, see \ref design_algo_sort.
  *
  * stxxl::sort sorts the elements in [first, last) into ascending order,
  * meaning that if \c i and \c j are any two valid iterators in [first, last)
@@ -674,10 +675,13 @@ void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_
 {
     sort_helper::verify_sentinel_strict_weak_ordering(cmp);
 
-    typedef simple_vector<sort_helper::trigger_entry<typename ExtIterator::block_type> > run_type;
-
     typedef typename ExtIterator::vector_type::value_type value_type;
     typedef typename ExtIterator::block_type block_type;
+    typedef typename ExtIterator::bid_type bid_type;
+    typedef typename ExtIterator::vector_type::alloc_strategy_type alloc_strategy_type;
+    typedef typename ExtIterator::bids_container_iterator bids_container_iterator;
+
+    typedef simple_vector<sort_helper::trigger_entry<block_type> > run_type;
 
     unsigned_type n = 0;
 
@@ -700,8 +704,8 @@ void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_
             if (last.block_offset())              // first and last element are
             // not the first elements of their block
             {
-                typename ExtIterator::block_type * first_block = new typename ExtIterator::block_type;
-                typename ExtIterator::block_type * last_block = new typename ExtIterator::block_type;
+                block_type* first_block = new block_type;
+                block_type* last_block = new block_type;
                 typename ExtIterator::bid_type first_bid, last_bid;
                 request_ptr req;
 
@@ -710,7 +714,6 @@ void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_
                 mng->new_block(FR(), last_bid);
                 req->wait();
 
-
                 req = last_block->read(*last.bid());
 
                 unsigned_type i = 0;
@@ -721,7 +724,6 @@ void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_
 
                 req->wait();
 
-
                 req = first_block->write(first_bid);
                 for (i = last.block_offset(); i < block_type::size; ++i)
                 {
@@ -730,7 +732,6 @@ void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_
 
                 req->wait();
 
-
                 req = last_block->write(last_bid);
 
                 n = last.bid() - first.bid() + 1;
@@ -740,22 +741,19 @@ void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_
 
                 req->wait();
 
-
                 delete first_block;
                 delete last_block;
 
                 run_type* out =
                     sort_local::sort_blocks<
-                        typename ExtIterator::block_type,
-                        typename ExtIterator::vector_type::alloc_strategy_type,
-                        typename ExtIterator::bids_container_iterator>
-                        (first.bid(), n, M / sort_memory_usage_factor() / block_type::raw_size, cmp);
-
-
-                first_block = new typename ExtIterator::block_type;
-                last_block = new typename ExtIterator::block_type;
-                typename ExtIterator::block_type * sorted_first_block = new typename ExtIterator::block_type;
-                typename ExtIterator::block_type * sorted_last_block = new typename ExtIterator::block_type;
+                        block_type, alloc_strategy_type, bids_container_iterator
+                        >(first.bid(), n,
+                          M / sort_memory_usage_factor() / block_type::raw_size, cmp);
+
+                first_block = new block_type;
+                last_block = new block_type;
+                block_type* sorted_first_block = new block_type;
+                block_type* sorted_last_block = new block_type;
                 request_ptr* reqs = new request_ptr[2];
 
                 reqs[0] = first_block->read(first_bid);
@@ -794,7 +792,7 @@ void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_
 
                 typename run_type::iterator it = out->begin();
                 ++it;
-                typename ExtIterator::bids_container_iterator cur_bid = first.bid();
+                bids_container_iterator cur_bid = first.bid();
                 ++cur_bid;
 
                 for ( ; cur_bid != last.bid(); ++cur_bid, ++it)
@@ -810,22 +808,20 @@ void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_
 
                 req->wait();
 
-
                 delete last_block;
             }
             else
             {
                 // first element is
                 // not the first element of its block
-                typename ExtIterator::block_type * first_block = new typename ExtIterator::block_type;
-                typename ExtIterator::bid_type first_bid;
+                block_type* first_block = new block_type;
+                bid_type first_bid;
                 request_ptr req;
 
                 req = first_block->read(*first.bid());
                 mng->new_block(FR(), first_bid);                // try to overlap
                 req->wait();
 
-
                 unsigned_type i = 0;
                 for ( ; i < first.block_offset(); ++i)
                 {
@@ -840,20 +836,17 @@ void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_
 
                 req->wait();
 
-
                 delete first_block;
 
                 run_type* out =
                     sort_local::sort_blocks<
-                        typename ExtIterator::block_type,
-                        typename ExtIterator::vector_type::alloc_strategy_type,
-                        typename ExtIterator::bids_container_iterator>
-                        (first.bid(), n, M / sort_memory_usage_factor() / block_type::raw_size, cmp);
-
+                        block_type, alloc_strategy_type, bids_container_iterator
+                        >(first.bid(), n,
+                          M / sort_memory_usage_factor() / block_type::raw_size, cmp);
 
-                first_block = new typename ExtIterator::block_type;
+                first_block = new block_type;
 
-                typename ExtIterator::block_type * sorted_first_block = new typename ExtIterator::block_type;
+                block_type* sorted_first_block = new block_type;
 
                 request_ptr* reqs = new request_ptr[2];
 
@@ -876,7 +869,7 @@ void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_
 
                 typename run_type::iterator it = out->begin();
                 ++it;
-                typename ExtIterator::bids_container_iterator cur_bid = first.bid();
+                bids_container_iterator cur_bid = first.bid();
                 ++cur_bid;
 
                 for ( ; cur_bid != last.bid(); ++cur_bid, ++it)
@@ -900,8 +893,8 @@ void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_
             if (last.block_offset())            // last is
             // not the first element of its block
             {
-                typename ExtIterator::block_type * last_block = new typename ExtIterator::block_type;
-                typename ExtIterator::bid_type last_bid;
+                block_type* last_block = new block_type;
+                bid_type last_bid;
                 request_ptr req;
                 unsigned_type i;
 
@@ -909,7 +902,6 @@ void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_
                 mng->new_block(FR(), last_bid);
                 req->wait();
 
-
                 for (i = last.block_offset(); i < block_type::size; ++i)
                 {
                     last_block->elem[i] = cmp.max_value();
@@ -923,19 +915,16 @@ void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_
 
                 req->wait();
 
-
                 delete last_block;
 
                 run_type* out =
                     sort_local::sort_blocks<
-                        typename ExtIterator::block_type,
-                        typename ExtIterator::vector_type::alloc_strategy_type,
-                        typename ExtIterator::bids_container_iterator>
-                        (first.bid(), n, M / sort_memory_usage_factor() / block_type::raw_size, cmp);
-
+                        block_type, alloc_strategy_type, bids_container_iterator
+                        >(first.bid(), n,
+                          M / sort_memory_usage_factor() / block_type::raw_size, cmp);
 
-                last_block = new typename ExtIterator::block_type;
-                typename ExtIterator::block_type * sorted_last_block = new typename ExtIterator::block_type;
+                last_block = new block_type;
+                block_type* sorted_last_block = new block_type;
                 request_ptr* reqs = new request_ptr[2];
 
                 reqs[0] = last_block->read(last_bid);
@@ -956,7 +945,7 @@ void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_
                 *last.bid() = last_bid;
 
                 typename run_type::iterator it = out->begin();
-                typename ExtIterator::bids_container_iterator cur_bid = first.bid();
+                bids_container_iterator cur_bid = first.bid();
 
                 for ( ; cur_bid != last.bid(); ++cur_bid, ++it)
                 {
@@ -977,13 +966,13 @@ void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_
                 n = last.bid() - first.bid();
 
                 run_type* out =
-                    sort_local::sort_blocks<typename ExtIterator::block_type,
-                                            typename ExtIterator::vector_type::alloc_strategy_type,
-                                            typename ExtIterator::bids_container_iterator>
-                        (first.bid(), n, M / sort_memory_usage_factor() / block_type::raw_size, cmp);
+                    sort_local::sort_blocks<
+                        block_type, alloc_strategy_type, bids_container_iterator
+                        >(first.bid(), n,
+                          M / sort_memory_usage_factor() / block_type::raw_size, cmp);
 
                 typename run_type::iterator it = out->begin();
-                typename ExtIterator::bids_container_iterator cur_bid = first.bid();
+                bids_container_iterator cur_bid = first.bid();
 
                 for ( ; cur_bid != last.bid(); ++cur_bid, ++it)
                 {
diff --git a/include/stxxl/bits/algo/sort_base.h b/include/stxxl/bits/algo/sort_base.h
index cb82efb..c059207 100644
--- a/include/stxxl/bits/algo/sort_base.h
+++ b/include/stxxl/bits/algo/sort_base.h
@@ -16,7 +16,6 @@
 #include <cmath>
 #include <stxxl/bits/common/types.h>
 
-
 #ifndef STXXL_NO_WARN_RECURSIVE_SORT
 #define STXXL_WARNMSG_RECURSIVE_SORT STXXL_ERRMSG
 #else
diff --git a/include/stxxl/bits/algo/sort_helper.h b/include/stxxl/bits/algo/sort_helper.h
index f582501..524e40d 100644
--- a/include/stxxl/bits/algo/sort_helper.h
+++ b/include/stxxl/bits/algo/sort_helper.h
@@ -19,7 +19,6 @@
 #include <stxxl/bits/algo/run_cursor.h>
 #include <stxxl/bits/verbose.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \internal
@@ -28,19 +27,18 @@ namespace sort_helper {
 template <typename StrictWeakOrdering>
 inline void verify_sentinel_strict_weak_ordering(StrictWeakOrdering cmp)
 {
-    assert(!cmp(cmp.min_value(), cmp.min_value()));
-    assert(cmp(cmp.min_value(), cmp.max_value()));
-    assert(!cmp(cmp.max_value(), cmp.min_value()));
-    assert(!cmp(cmp.max_value(), cmp.max_value()));
-    STXXL_UNUSED(cmp);
+    STXXL_ASSERT(!cmp(cmp.min_value(), cmp.min_value()));
+    STXXL_ASSERT(cmp(cmp.min_value(), cmp.max_value()));
+    STXXL_ASSERT(!cmp(cmp.max_value(), cmp.min_value()));
+    STXXL_ASSERT(!cmp(cmp.max_value(), cmp.max_value()));
 }
 
-template <typename BlockTp_, typename ValTp_ = typename BlockTp_::value_type>
+template <typename BlockType, typename ValueType = typename BlockType::value_type>
 struct trigger_entry
 {
-    typedef BlockTp_ block_type;
+    typedef BlockType block_type;
     typedef typename block_type::bid_type bid_type;
-    typedef ValTp_ value_type;
+    typedef ValueType value_type;
 
     bid_type bid;
     value_type value;
@@ -51,12 +49,13 @@ struct trigger_entry
     }
 };
 
-template <typename TriggerEntryTp_, typename ValueCmp_>
-struct trigger_entry_cmp : public std::binary_function<TriggerEntryTp_, TriggerEntryTp_, bool>
+template <typename TriggerEntryType, typename ValueCmp>
+struct trigger_entry_cmp
+    : public std::binary_function<TriggerEntryType, TriggerEntryType, bool>
 {
-    typedef TriggerEntryTp_ trigger_entry_type;
-    ValueCmp_ cmp;
-    trigger_entry_cmp(ValueCmp_ c) : cmp(c) { }
+    typedef TriggerEntryType trigger_entry_type;
+    ValueCmp cmp;
+    trigger_entry_cmp(ValueCmp c) : cmp(c) { }
     trigger_entry_cmp(const trigger_entry_cmp& a) : cmp(a.cmp) { }
     bool operator () (const trigger_entry_type& a, const trigger_entry_type& b) const
     {
@@ -64,11 +63,20 @@ struct trigger_entry_cmp : public std::binary_function<TriggerEntryTp_, TriggerE
     }
 };
 
-template <typename block_type,
-          typename prefetcher_type,
-          typename value_cmp>
-struct run_cursor2_cmp : public std::binary_function<run_cursor2<block_type, prefetcher_type>, run_cursor2<block_type, prefetcher_type>, bool>
+template <typename BlockType,
+          typename PrefetcherType,
+          typename ValueCmp>
+struct run_cursor2_cmp
+    : public std::binary_function<
+          run_cursor2<BlockType, PrefetcherType>,
+          run_cursor2<BlockType, PrefetcherType>,
+          bool
+          >
 {
+    typedef BlockType block_type;
+    typedef PrefetcherType prefetcher_type;
+    typedef ValueCmp value_cmp;
+
     typedef run_cursor2<block_type, prefetcher_type> cursor_type;
     value_cmp cmp;
 
@@ -89,8 +97,9 @@ struct run_cursor2_cmp : public std::binary_function<run_cursor2<block_type, pre
 
 // this function is used by parallel mergers
 template <typename SequenceVector, typename ValueType, typename Comparator>
-inline
-unsigned_type count_elements_less_equal(const SequenceVector& seqs, const ValueType& bound, Comparator cmp)
+inline unsigned_type
+count_elements_less_equal(const SequenceVector& seqs,
+                          const ValueType& bound, Comparator cmp)
 {
     typedef typename SequenceVector::size_type seqs_size_type;
     typedef typename SequenceVector::value_type::first_type iterator;
@@ -108,10 +117,10 @@ unsigned_type count_elements_less_equal(const SequenceVector& seqs, const ValueT
 
 // this function is used by parallel mergers
 template <typename SequenceVector, typename BufferPtrVector, typename Prefetcher>
-inline
-void refill_or_remove_empty_sequences(SequenceVector& seqs,
-                                      BufferPtrVector& buffers,
-                                      Prefetcher& prefetcher)
+inline void
+refill_or_remove_empty_sequences(SequenceVector& seqs,
+                                 BufferPtrVector& buffers,
+                                 Prefetcher& prefetcher)
 {
     typedef typename SequenceVector::size_type seqs_size_type;
 
diff --git a/include/stxxl/bits/algo/stable_ksort.h b/include/stxxl/bits/algo/stable_ksort.h
index 8acb13b..bd05e8a 100644
--- a/include/stxxl/bits/algo/stable_ksort.h
+++ b/include/stxxl/bits/algo/stable_ksort.h
@@ -17,7 +17,6 @@
 // it is a first try: distribution sort without sampling
 // I rework the stable_ksort when I would have a time
 
-
 #include <stxxl/bits/mng/block_manager.h>
 #include <stxxl/bits/mng/buf_istream.h>
 #include <stxxl/bits/mng/buf_ostream.h>
@@ -26,12 +25,10 @@
 #include <stxxl/bits/algo/sort_base.h>
 #include <stxxl/bits/common/utils.h>
 
-
 #ifndef STXXL_VERBOSE_STABLE_KSORT
 #define STXXL_VERBOSE_STABLE_KSORT STXXL_VERBOSE1
 #endif
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup stlalgo
@@ -41,51 +38,52 @@ STXXL_BEGIN_NAMESPACE
  */
 namespace stable_ksort_local {
 
-template <class type_, class type_key>
-void classify_block(type_* begin, type_* end, type_key*& out, int_type* bucket, unsigned_type offset, unsigned shift)
+template <class Type, class TypeKey>
+void classify_block(Type* begin, Type* end, TypeKey*& out,
+                    int_type* bucket, typename Type::key_type offset,
+                    unsigned shift)
 {
-    for (type_* p = begin; p < end; p++, out++)      // count & create references
+    for (Type* p = begin; p < end; p++, out++)      // count & create references
     {
         out->ptr = p;
-        typename type_::key_type key = p->key();
-        int_type ibucket = (key - offset) >> shift;
+        typename Type::key_type key = p->key();
+        int_type ibucket = (int_type)((key - offset) >> shift);
         out->key = key;
         bucket[ibucket]++;
     }
 }
 
-template <typename type>
+template <typename Type>
 struct type_key
 {
-    typedef typename type::key_type key_type;
+    typedef typename Type::key_type key_type;
     key_type key;
-    type* ptr;
+    Type* ptr;
 
     type_key() { }
-    type_key(key_type k, type* p) : key(k), ptr(p)
+    type_key(key_type k, Type* p) : key(k), ptr(p)
     { }
 };
 
-template <typename type>
-bool operator < (const type_key<type>& a, const type_key<type>& b)
+template <typename Type>
+bool operator < (const type_key<Type>& a, const type_key<Type>& b)
 {
     return a.key < b.key;
 }
 
-template <typename type>
-bool operator > (const type_key<type>& a, const type_key<type>& b)
+template <typename Type>
+bool operator > (const type_key<Type>& a, const type_key<Type>& b)
 {
     return a.key > b.key;
 }
 
-
-template <typename BIDType_, typename AllocStrategy_>
+template <typename BIDType, typename AllocStrategy>
 class bid_sequence
 {
 public:
-    typedef BIDType_ bid_type;
+    typedef BIDType bid_type;
     typedef bid_type& reference;
-    typedef AllocStrategy_ alloc_strategy;
+    typedef AllocStrategy alloc_strategy;
     typedef typename simple_vector<bid_type>::size_type size_type;
     typedef typename simple_vector<bid_type>::iterator iterator;
 
@@ -130,23 +128,24 @@ public:
     }
 };
 
-template <typename ExtIterator_>
+template <typename ExtIterator>
 void distribute(
-    bid_sequence<typename ExtIterator_::vector_type::block_type::bid_type,
-                 typename ExtIterator_::vector_type::alloc_strategy_type>* bucket_bids,
+    bid_sequence<typename ExtIterator::vector_type::block_type::bid_type,
+                 typename ExtIterator::vector_type::alloc_strategy_type>* bucket_bids,
     int64* bucket_sizes,
     const int_type nbuckets,
     const int_type lognbuckets,
-    ExtIterator_ first,
-    ExtIterator_ last,
+    ExtIterator first,
+    ExtIterator last,
     const int_type nread_buffers,
     const int_type nwrite_buffers)
 {
-    typedef typename ExtIterator_::vector_type::value_type value_type;
+    typedef typename ExtIterator::vector_type::value_type value_type;
     typedef typename value_type::key_type key_type;
-    typedef typename ExtIterator_::block_type block_type;
-    typedef buf_istream<typename ExtIterator_::block_type,
-                        typename ExtIterator_::bids_container_iterator> buf_istream_type;
+    typedef typename ExtIterator::block_type block_type;
+    typedef typename ExtIterator::bids_container_iterator bids_container_iterator;
+
+    typedef buf_istream<block_type, bids_container_iterator> buf_istream_type;
 
     int_type i = 0;
 
@@ -168,21 +167,19 @@ void distribute(
     for (i = 0; i < nbuckets; i++)
         bucket_blocks[i] = out.get_free_block();
 
-
-    ExtIterator_ cur = first - first.block_offset();
+    ExtIterator cur = first - first.block_offset();
 
     // skip part of the block before first untouched
     for ( ; cur != first; cur++)
         ++in;
 
-
     const int_type shift = sizeof(key_type) * 8 - lognbuckets;
     // search in the the range [_begin,_end)
     STXXL_VERBOSE_STABLE_KSORT("Shift by: " << shift << " bits, lognbuckets: " << lognbuckets);
     for ( ; cur != last; cur++)
     {
         key_type cur_key = in.current().key();
-        int_type ibucket = cur_key >> shift;
+        int_type ibucket = (int_type)(cur_key >> shift);
 
         int_type block_offset = bucket_block_offsets[ibucket];
         in >> (bucket_blocks[ibucket]->elem[block_offset++]);
@@ -220,15 +217,16 @@ void distribute(
 //! \remark Elements must provide a method key() which returns the integer key.
 //! \remark Not yet fully implemented, it assumes that the keys are uniformly
 //! distributed between [0,std::numeric_limits<key_type>::max().
-template <typename ExtIterator_>
-void stable_ksort(ExtIterator_ first, ExtIterator_ last, unsigned_type M)
+template <typename ExtIterator>
+void stable_ksort(ExtIterator first, ExtIterator last, unsigned_type M)
 {
     STXXL_MSG("Warning: stable_ksort is not yet fully implemented, it assumes that the keys are uniformly distributed between [0,std::numeric_limits<key_type>::max()]");
-    typedef typename ExtIterator_::vector_type::value_type value_type;
+    typedef typename ExtIterator::vector_type::value_type value_type;
     typedef typename value_type::key_type key_type;
-    typedef typename ExtIterator_::block_type block_type;
+    typedef typename ExtIterator::block_type block_type;
+    typedef typename ExtIterator::bids_container_iterator bids_container_iterator;
     typedef typename block_type::bid_type bid_type;
-    typedef typename ExtIterator_::vector_type::alloc_strategy_type alloc_strategy;
+    typedef typename ExtIterator::vector_type::alloc_strategy_type alloc_strategy;
     typedef stable_ksort_local::bid_sequence<bid_type, alloc_strategy> bucket_bids_type;
     typedef stable_ksort_local::type_key<value_type> type_key_;
 
@@ -247,7 +245,7 @@ void stable_ksort(ExtIterator_ first, ExtIterator_ last, unsigned_type M)
     const unsigned_type nmaxbuckets = m - min_num_read_write_buffers;
     const unsigned int lognbuckets = ilog2_floor(nmaxbuckets);
     const unsigned_type nbuckets = unsigned_type(1) << lognbuckets;
-    const uint64 est_bucket_size = div_ceil((last - first) / nbuckets, block_type::size);      //in blocks
+    const unsigned_type est_bucket_size = (unsigned_type)div_ceil((last - first) / nbuckets, block_type::size);      //in blocks
 
     if (m < min_num_read_write_buffers + 2 || nbuckets < 2) {
         STXXL_ERRMSG("stxxl::stable_ksort: Not enough memory. Blocks available: " << m <<
@@ -305,7 +303,7 @@ void stable_ksort(ExtIterator_ first, ExtIterator_ last, unsigned_type M)
         }
         // here we can increase write_buffers_multiple_b knowing max(bucket_sizes[i])
         // ... and decrease max_bucket_size_bl
-        const int_type max_bucket_size_act_bl = div_ceil(max_bucket_size_act, block_type::size);
+        const int_type max_bucket_size_act_bl = (int_type)div_ceil(max_bucket_size_act, block_type::size);
         STXXL_VERBOSE_STABLE_KSORT("Reducing required number of required blocks per bucket from " <<
                                    max_bucket_size_bl << " to " << max_bucket_size_act_bl);
         max_bucket_size_rec = max_bucket_size_act;
@@ -313,7 +311,7 @@ void stable_ksort(ExtIterator_ first, ExtIterator_ last, unsigned_type M)
         const unsigned_type nwrite_buffers_bs = m - 2 * max_bucket_size_bl;
         STXXL_VERBOSE_STABLE_KSORT("Write buffers in bucket sorting phase: " << nwrite_buffers_bs);
 
-        typedef buf_ostream<block_type, typename ExtIterator_::bids_container_iterator> buf_ostream_type;
+        typedef buf_ostream<block_type, bids_container_iterator> buf_ostream_type;
         buf_ostream_type out(first.bid(), nwrite_buffers_bs);
 
         disk_queues::get_instance()->set_priority_op(request_queue::READ);
@@ -336,20 +334,18 @@ void stable_ksort(ExtIterator_ first, ExtIterator_ last, unsigned_type M)
         block_type* blocks2 = new block_type[max_bucket_size_bl];
         request_ptr* reqs1 = new request_ptr[max_bucket_size_bl];
         request_ptr* reqs2 = new request_ptr[max_bucket_size_bl];
-        type_key_* refs1 = new type_key_[max_bucket_size_rec];
-        type_key_* refs2 = new type_key_[max_bucket_size_rec];
+        type_key_* refs1 = new type_key_[(size_t)max_bucket_size_rec];
+        type_key_* refs2 = new type_key_[(size_t)max_bucket_size_rec];
 
         // submit reading first 2 buckets (Peter's scheme)
-        unsigned_type nbucket_blocks = div_ceil(bucket_sizes[0], block_type::size);
+        unsigned_type nbucket_blocks = (unsigned_type)div_ceil(bucket_sizes[0], block_type::size);
         for (i = 0; i < nbucket_blocks; i++)
             reqs1[i] = blocks1[i].read(bucket_bids[0][i]);
 
-
-        nbucket_blocks = div_ceil(bucket_sizes[1], block_type::size);
+        nbucket_blocks = (unsigned_type)div_ceil(bucket_sizes[1], block_type::size);
         for (i = 0; i < nbucket_blocks; i++)
             reqs2[i] = blocks2[i].read(bucket_bids[1][i]);
 
-
         key_type offset = 0;
         const unsigned log_k1 = STXXL_MAX<unsigned>(ilog2_ceil(max_bucket_size_rec * sizeof(type_key_) / STXXL_L2_SIZE), 1);
         unsigned_type k1 = unsigned_type(1) << log_k1;
@@ -363,7 +359,7 @@ void stable_ksort(ExtIterator_ first, ExtIterator_ last, unsigned_type M)
 
         for (unsigned_type k = 0; k < nbuckets; k++)
         {
-            nbucket_blocks = div_ceil(bucket_sizes[k], block_type::size);
+            nbucket_blocks = (unsigned_type)div_ceil(bucket_sizes[k], block_type::size);
             const unsigned log_k1_k = STXXL_MAX<unsigned>(ilog2_ceil(bucket_sizes[k] * sizeof(type_key_) / STXXL_L2_SIZE), 1);
             assert(log_k1_k <= log_k1);
             k1 = (unsigned_type)(1) << log_k1_k;
@@ -377,15 +373,18 @@ void stable_ksort(ExtIterator_ first, ExtIterator_ last, unsigned_type M)
             for (i = 0; i < nbucket_blocks - 1; i++)
             {
                 reqs1[i]->wait();
-                stable_ksort_local::classify_block(blocks1[i].begin(), blocks1[i].end(), ref_ptr, bucket1, offset1, shift1 /*,k1*/);
+                stable_ksort_local::classify_block(blocks1[i].begin(), blocks1[i].end(),
+                                                   ref_ptr, bucket1, offset1, shift1 /*,k1*/);
             }
             // last block might be non-full
-            const unsigned_type last_block_size = bucket_sizes[k] - int64(nbucket_blocks - 1) * block_type::size;
+            const unsigned_type last_block_size =
+                (unsigned_type)(bucket_sizes[k] - (nbucket_blocks - 1) * block_type::size);
             reqs1[i]->wait();
 
             //STXXL_MSG("block_type::size: "<<block_type::size<<" last_block_size:"<<last_block_size);
 
-            classify_block(blocks1[i].begin(), blocks1[i].begin() + last_block_size, ref_ptr, bucket1, offset1, shift1);
+            classify_block(blocks1[i].begin(), blocks1[i].begin() + last_block_size,
+                           ref_ptr, bucket1, offset1, shift1);
 
             exclusive_prefix_sum(bucket1, k1);
             classify(refs1, refs1 + bucket_sizes[k], refs2, bucket1, offset1, shift1);
@@ -411,7 +410,6 @@ void stable_ksort(ExtIterator_ first, ExtIterator_ last, unsigned_type M)
                 for (type_key_* p = d; p < dEnd; p++)
                     out << (*(p->ptr));
 
-
                 delete[] bucket2;
                 c = cEnd;
                 d = dEnd;
@@ -420,7 +418,7 @@ void stable_ksort(ExtIterator_ first, ExtIterator_ last, unsigned_type M)
             const unsigned_type bucket2submit = k + 2;
             if (bucket2submit < nbuckets)
             {
-                nbucket_blocks = div_ceil(bucket_sizes[bucket2submit], block_type::size);
+                nbucket_blocks = (unsigned_type)div_ceil(bucket_sizes[bucket2submit], block_type::size);
                 for (i = 0; i < nbucket_blocks; i++)
                     reqs1[i] = blocks1[i].read(bucket_bids[bucket2submit][i]);
             }
@@ -460,7 +458,6 @@ void stable_ksort(ExtIterator_ first, ExtIterator_ last, unsigned_type M)
                   dist_end - begin << " s");
     STXXL_VERBOSE("Time in I/O wait(ds): " << io_wait_after_d << " s");
     STXXL_VERBOSE(*stats::get_instance());
-    STXXL_UNUSED(begin + dist_end + end + io_wait_after_d);
 }
 
 //! \}
diff --git a/include/stxxl/bits/common/addressable_queues.h b/include/stxxl/bits/common/addressable_queues.h
index 1ea390c..733f10d 100644
--- a/include/stxxl/bits/common/addressable_queues.h
+++ b/include/stxxl/bits/common/addressable_queues.h
@@ -21,22 +21,23 @@
 
 STXXL_BEGIN_NAMESPACE
 
-//! An internal fifo queue that allows removing elements addressed with (a copy of) themselves.
+//! An internal fifo queue that allows removing elements addressed with (a copy
+//! of) themselves.
 //! \tparam KeyType Type of contained elements.
 template <typename KeyType>
 class addressable_fifo_queue
 {
-    typedef std::list<KeyType> container_t;
-    typedef typename container_t::iterator container_iter_t;
-    typedef std::map<KeyType, container_iter_t> meta_t;
-    typedef typename meta_t::iterator meta_iter_t;
+    typedef std::list<KeyType> container_type;
+    typedef typename container_type::iterator container_iterator;
+    typedef std::map<KeyType, container_iterator> meta_type;
+    typedef typename meta_type::iterator meta_iterator;
 
-    container_t vals;
-    meta_t meta;
+    container_type vals;
+    meta_type meta;
 
 public:
     //! Type of handle to an entry. For use with insert and remove.
-    typedef meta_iter_t handle;
+    typedef meta_iterator handle;
 
     //! Create an empty queue.
     addressable_fifo_queue() { }
@@ -47,12 +48,14 @@ public:
     bool empty() const
     { return vals.empty(); }
 
-    //! Insert new element. If the element is already in, it is moved to the back.
+    //! Insert new element. If the element is already in, it is moved to the
+    //! back.
     //! \param e Element to insert.
-    //! \return pair<handle, bool> Iterator to element; if element was newly inserted.
+    //! \return pair<handle, bool> Iterator to element; if element was newly
+    //! inserted.
     std::pair<handle, bool> insert(const KeyType& e)
     {
-        container_iter_t ei = vals.insert(vals.end(), e);
+        container_iterator ei = vals.insert(vals.end(), e);
         std::pair<handle, bool> r = meta.insert(std::make_pair(e, ei));
         if (! r.second)
         {
@@ -101,7 +104,8 @@ public:
     }
 };
 
-//! An internal priority queue that allows removing elements addressed with (a copy of) themselves.
+//! An internal priority queue that allows removing elements addressed with (a
+//! copy of) themselves.
 //! \tparam KeyType Type of contained elements.
 //! \tparam PriorityType Type of Priority.
 template <typename KeyType, typename PriorityType, class Cmp = std::less<PriorityType> >
@@ -118,17 +122,17 @@ class addressable_priority_queue
         }
     };
 
-    typedef std::set<std::pair<PriorityType, KeyType>, cmp> container_t;
-    typedef typename container_t::iterator container_iter_t;
-    typedef std::map<KeyType, container_iter_t> meta_t;
-    typedef typename meta_t::iterator meta_iter_t;
+    typedef std::set<std::pair<PriorityType, KeyType>, cmp> container_type;
+    typedef typename container_type::iterator container_iterator;
+    typedef std::map<KeyType, container_iterator> meta_type;
+    typedef typename meta_type::iterator meta_iterator;
 
-    container_t vals;
-    meta_t meta;
+    container_type vals;
+    meta_type meta;
 
 public:
     //! Type of handle to an entry. For use with insert and remove.
-    typedef meta_iter_t handle;
+    typedef meta_iterator handle;
 
     //! Create an empty queue.
     addressable_priority_queue() { }
@@ -145,7 +149,7 @@ public:
     //! \return pair<handle, bool> Iterator to element; if element was newly inserted.
     std::pair<handle, bool> insert(const KeyType& e, const PriorityType o)
     {
-        std::pair<container_iter_t, bool> s = vals.insert(std::make_pair(o, e));
+        std::pair<container_iterator, bool> s = vals.insert(std::make_pair(o, e));
         std::pair<handle, bool> r = meta.insert(std::make_pair(e, s.first));
         if (! r.second && s.second)
         {
diff --git a/include/stxxl/bits/common/aligned_alloc.h b/include/stxxl/bits/common/aligned_alloc.h
index 366572e..8de0875 100644
--- a/include/stxxl/bits/common/aligned_alloc.h
+++ b/include/stxxl/bits/common/aligned_alloc.h
@@ -19,20 +19,19 @@
 #include <stxxl/bits/verbose.h>
 #include <stxxl/bits/common/utils.h>
 
-
 #ifndef STXXL_VERBOSE_ALIGNED_ALLOC
 #define STXXL_VERBOSE_ALIGNED_ALLOC STXXL_VERBOSE2
 #endif
 
 STXXL_BEGIN_NAMESPACE
 
-template <typename must_be_int>
+template <typename MustBeInt>
 struct aligned_alloc_settings {
     static bool may_use_realloc;
 };
 
-template <typename must_be_int>
-bool aligned_alloc_settings<must_be_int>::may_use_realloc = true;
+template <typename MustBeInt>
+bool aligned_alloc_settings<MustBeInt>::may_use_realloc = true;
 
 // meta_info_size > 0 is needed for array allocations that have overhead
 //
@@ -45,17 +44,17 @@ bool aligned_alloc_settings<must_be_int>::may_use_realloc = true;
 //                     pointer to buffer
 // (---) unallocated, (===) allocated memory
 
-template <size_t ALIGNMENT>
+template <size_t Alignment>
 inline void * aligned_alloc(size_t size, size_t meta_info_size = 0)
 {
-    STXXL_VERBOSE2("stxxl::aligned_alloc<" << ALIGNMENT << ">(), size = " << size << ", meta info size = " << meta_info_size);
+    STXXL_VERBOSE2("stxxl::aligned_alloc<" << Alignment << ">(), size = " << size << ", meta info size = " << meta_info_size);
 #if !defined(STXXL_WASTE_MORE_MEMORY_FOR_IMPROVED_ACCESS_AFTER_ALLOCATED_MEMORY_CHECKS)
     // malloc()/realloc() variant that frees the unused amount of memory
     // after the data area of size 'size'. realloc() from valgrind does not
     // preserve the old memory area when shrinking, so out-of-bounds
     // accesses can't be detected easily.
-    // Overhead: about ALIGNMENT bytes.
-    size_t alloc_size = ALIGNMENT + sizeof(char*) + meta_info_size + size;
+    // Overhead: about Alignment bytes.
+    size_t alloc_size = Alignment + sizeof(char*) + meta_info_size + size;
     char* buffer = (char*)std::malloc(alloc_size);
 #else
     // More space consuming and memory fragmenting variant using
@@ -64,11 +63,11 @@ inline void * aligned_alloc(size_t size, size_t meta_info_size = 0)
     // block, so no corrections are neccessary and
     // access-behind-allocated-memory problems can be easily detected by
     // valgrind. Usually produces an extra memory fragment of about
-    // ALIGNMENT bytes.
-    // Overhead: about 2 * ALIGNMENT bytes.
-    size_t alloc_size = ALIGNMENT * div_ceil(sizeof(char*) + meta_info_size, ALIGNMENT) + size;
+    // Alignment bytes.
+    // Overhead: about 2 * Alignment bytes.
+    size_t alloc_size = Alignment * div_ceil(sizeof(char*) + meta_info_size, Alignment) + size;
     char* buffer;
-    if (posix_memalign((void**)&buffer, ALIGNMENT, alloc_size) != 0)
+    if (posix_memalign((void**)&buffer, Alignment, alloc_size) != 0)
         throw std::bad_alloc();
 #endif
     if (buffer == NULL)
@@ -77,9 +76,9 @@ inline void * aligned_alloc(size_t size, size_t meta_info_size = 0)
     memset(buffer, 0, alloc_size);
     #endif
     char* reserve_buffer = buffer + sizeof(char*) + meta_info_size;
-    char* result = reserve_buffer + ALIGNMENT -
-                   (((unsigned_type)reserve_buffer) % (ALIGNMENT)) - meta_info_size;
-    STXXL_VERBOSE2("stxxl::aligned_alloc<" << ALIGNMENT << ">() address " << (void*)result << " lost " << (result - buffer) << " bytes");
+    char* result = reserve_buffer + Alignment -
+                   (((unsigned_type)reserve_buffer) % (Alignment)) - meta_info_size;
+    STXXL_VERBOSE2("stxxl::aligned_alloc<" << Alignment << ">() address " << (void*)result << " lost " << (result - buffer) << " bytes");
     //-tb: check that there is space for one char* before the "result" pointer
     // delivered to the user. this char* is set below to the beginning of the
     // allocated area.
@@ -96,28 +95,31 @@ inline void * aligned_alloc(size_t size, size_t meta_info_size = 0)
             STXXL_ERRMSG("stxxl::aligned_alloc: disabling realloc()");
             std::free(realloced);
             aligned_alloc_settings<int>::may_use_realloc = false;
-            return aligned_alloc<ALIGNMENT>(size, meta_info_size);
+            return aligned_alloc<Alignment>(size, meta_info_size);
         }
         assert(result + size <= buffer + realloc_size);
     }
 
     *(((char**)result) - 1) = buffer;
-    STXXL_VERBOSE2("stxxl::aligned_alloc<" << ALIGNMENT << ">(), allocated at " << (void*)buffer << " returning " << (void*)result);
-    STXXL_VERBOSE_ALIGNED_ALLOC("stxxl::aligned_alloc<" << ALIGNMENT <<
-                                ">(size = " << size << ", meta info size = " << meta_info_size <<
-                                ") => buffer = " << (void*)buffer << ", ptr = " << (void*)result);
+    STXXL_VERBOSE2(
+        "stxxl::aligned_alloc<" << Alignment << ">(), allocated at " <<
+        (void*)buffer << " returning " << (void*)result);
+    STXXL_VERBOSE_ALIGNED_ALLOC(
+        "stxxl::aligned_alloc<" << Alignment <<
+        ">(size = " << size << ", meta info size = " << meta_info_size <<
+        ") => buffer = " << (void*)buffer << ", ptr = " << (void*)result);
 
     return result;
 }
 
-template <size_t ALIGNMENT>
+template <size_t Alignment>
 inline void
 aligned_dealloc(void* ptr)
 {
     if (!ptr)
         return;
     char* buffer = *(((char**)ptr) - 1);
-    STXXL_VERBOSE_ALIGNED_ALLOC("stxxl::aligned_dealloc<" << ALIGNMENT << ">(), ptr = " << ptr << ", buffer = " << (void*)buffer);
+    STXXL_VERBOSE_ALIGNED_ALLOC("stxxl::aligned_dealloc<" << Alignment << ">(), ptr = " << ptr << ", buffer = " << (void*)buffer);
     std::free(buffer);
 }
 
diff --git a/include/stxxl/bits/common/binary_buffer.h b/include/stxxl/bits/common/binary_buffer.h
new file mode 100644
index 0000000..9cc2289
--- /dev/null
+++ b/include/stxxl/bits/common/binary_buffer.h
@@ -0,0 +1,650 @@
+/***************************************************************************
+ *  include/stxxl/bits/common/binary_buffer.h
+ *
+ *  Classes binary_buffer and binary_reader to construct data blocks with
+ *  variable length content. Programs construct blocks using
+ *  binary_buffer::put<type>() and read them using
+ *  binary_reader::get<type>(). The operation sequences should match.
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2013-2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#ifndef STXXL_COMMON_BINARY_BUFFER_HEADER
+#define STXXL_COMMON_BINARY_BUFFER_HEADER
+
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <stxxl/bits/namespace.h>
+#include <stxxl/bits/common/types.h>
+
+STXXL_BEGIN_NAMESPACE
+
+//! \addtogroup support
+//! \{
+
+/*!
+ * binary_buffer represents a dynamically growable area of memory, which can be
+ * modified by appending integral data types via put() and other basic
+ * operations.
+ */
+class binary_buffer
+{
+protected:
+    //! Allocated buffer pointer.
+    char* m_data;
+
+    //! Size of valid data.
+    size_t m_size;
+
+    //! Total capacity of buffer.
+    size_t m_capacity;
+
+public:
+    //! Create a new empty object
+    inline binary_buffer()
+        : m_data(NULL), m_size(0), m_capacity(0)
+    { }
+
+    //! Copy-Constructor, duplicates memory content.
+    inline binary_buffer(const binary_buffer& other)
+        : m_data(NULL), m_size(0), m_capacity(0)
+    {
+        assign(other);
+    }
+
+    //! Constructor, copy memory area.
+    inline binary_buffer(const void* data, size_t n)
+        : m_data(NULL), m_size(0), m_capacity(0)
+    {
+        assign(data, n);
+    }
+
+    //! Constructor, create object with n bytes pre-allocated.
+    inline binary_buffer(size_t n)
+        : m_data(NULL), m_size(0), m_capacity(0)
+    {
+        alloc(n);
+    }
+
+    //! Constructor from std::string, copies string content.
+    inline binary_buffer(const std::string& str)
+        : m_data(NULL), m_size(0), m_capacity(0)
+    {
+        assign(str.data(), str.size());
+    }
+
+    //! Destroys the memory space.
+    inline ~binary_buffer()
+    {
+        dealloc();
+    }
+
+    //! Return a pointer to the currently kept memory area.
+    inline const char * data() const
+    {
+        return m_data;
+    }
+
+    //! Return a writeable pointer to the currently kept memory area.
+    inline char * data()
+    {
+        return m_data;
+    }
+
+    //! Return the currently used length in bytes.
+    inline size_t size() const
+    {
+        return m_size;
+    }
+
+    //! Return the currently allocated buffer capacity.
+    inline size_t capacity() const
+    {
+        return m_capacity;
+    }
+
+    //! Explicit conversion to std::string (copies memory of course).
+    inline std::string str() const
+    {
+        return std::string(reinterpret_cast<const char*>(m_data), m_size);
+    }
+
+    //! Set the valid bytes in the buffer, use if the buffer is filled
+    //! directly.
+    inline binary_buffer & set_size(size_t n)
+    {
+        assert(n <= m_capacity);
+        m_size = n;
+
+        return *this;
+    }
+
+    //! Make sure that at least n bytes are allocated.
+    inline binary_buffer & alloc(size_t n)
+    {
+        if (m_capacity < n)
+        {
+            m_capacity = n;
+            m_data = static_cast<char*>(realloc(m_data, m_capacity));
+        }
+
+        return *this;
+    }
+
+    //! Deallocates the kept memory space (we use dealloc() instead of free()
+    //! as a name, because sometimes "free" is replaced by the preprocessor)
+    inline binary_buffer & dealloc()
+    {
+        if (m_data) free(m_data);
+        m_data = NULL;
+        m_size = m_capacity = 0;
+
+        return *this;
+    }
+
+    //! Detach the memory from the object, returns the memory pointer.
+    inline const char * detach()
+    {
+        const char* data = m_data;
+        m_data = NULL;
+        m_size = m_capacity = 0;
+        return data;
+    }
+
+    //! Clears the memory contents, does not deallocate the memory.
+    inline binary_buffer & clear()
+    {
+        m_size = 0;
+        return *this;
+    }
+
+    //! Copy a memory range into the buffer, overwrites all current
+    //! data. Roughly equivalent to clear() followed by append().
+    inline binary_buffer & assign(const void* data, size_t len)
+    {
+        if (len > m_capacity) alloc(len);
+
+        memcpy(m_data, data, len);
+        m_size = len;
+
+        return *this;
+    }
+
+    //! Copy the contents of another buffer object into this buffer, overwrites
+    //! all current data. Roughly equivalent to clear() followed by append().
+    inline binary_buffer & assign(const binary_buffer& other)
+    {
+        if (&other != this)
+            assign(other.data(), other.size());
+
+        return *this;
+    }
+
+    //! Assignment operator: copy other's memory range into buffer.
+    inline binary_buffer& operator = (const binary_buffer& other)
+    {
+        if (&other != this)
+            assign(other.data(), other.size());
+
+        return *this;
+    }
+
+    //! Align the size of the buffer to a multiple of n. Fills up with 0s.
+    inline binary_buffer & align(size_t n)
+    {
+        assert(n > 0);
+        size_t rem = m_size % n;
+        if (rem != 0)
+        {
+            size_t add = n - rem;
+            if (m_size + add > m_capacity) dynalloc(m_size + add);
+            memset(m_data + m_size, 0, add);
+            m_size += add;
+        }
+        assert((m_size % n) == 0);
+
+        return *this;
+    }
+
+    //! Dynamically allocate more memory. At least n bytes will be available,
+    //! probably more to compensate future growth.
+    inline binary_buffer & dynalloc(size_t n)
+    {
+        if (m_capacity < n)
+        {
+            // place to adapt the buffer growing algorithm as need.
+            size_t newsize = m_capacity;
+
+            while (newsize < n) {
+                if (newsize < 256) newsize = 512;
+                else if (newsize < 1024 * 1024) newsize = 2 * newsize;
+                else newsize += 1024 * 1024;
+            }
+
+            alloc(newsize);
+        }
+
+        return *this;
+    }
+
+    // *** Appending Write Functions ***
+
+    //! Append a memory range to the buffer
+    inline binary_buffer & append(const void* data, size_t len)
+    {
+        if (m_size + len > m_capacity) dynalloc(m_size + len);
+
+        memcpy(m_data + m_size, data, len);
+        m_size += len;
+
+        return *this;
+    }
+
+    //! Append the contents of a different buffer object to this one.
+    inline binary_buffer & append(const class binary_buffer& bb)
+    {
+        return append(bb.data(), bb.size());
+    }
+
+    //! Append to contents of a std::string, excluding the null (which isn't
+    //! contained in the string size anyway).
+    inline binary_buffer & append(const std::string& s)
+    {
+        return append(s.data(), s.size());
+    }
+
+    //! Put (append) a single item of the template type T to the buffer. Be
+    //! careful with implicit type conversions!
+    template <typename Type>
+    inline binary_buffer & put(const Type item)
+    {
+        if (m_size + sizeof(Type) > m_capacity) dynalloc(m_size + sizeof(Type));
+
+        *reinterpret_cast<Type*>(m_data + m_size) = item;
+        m_size += sizeof(Type);
+
+        return *this;
+    }
+
+    //! Append a varint to the buffer.
+    inline binary_buffer & put_varint(uint32 v)
+    {
+        if (v < 128) {
+            put<uint8>(uint8(v));
+        }
+        else if (v < 128 * 128) {
+            put<uint8>((uint8)(((v >> 0) & 0x7F) | 0x80));
+            put<uint8>((uint8)((v >> 7) & 0x7F));
+        }
+        else if (v < 128 * 128 * 128) {
+            put<uint8>((uint8)(((v >> 0) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 7) & 0x7F) | 0x80));
+            put<uint8>((uint8)((v >> 14) & 0x7F));
+        }
+        else if (v < 128 * 128 * 128 * 128) {
+            put<uint8>((uint8)(((v >> 0) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 7) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 14) & 0x7F) | 0x80));
+            put<uint8>((uint8)((v >> 21) & 0x7F));
+        }
+        else {
+            put<uint8>((uint8)(((v >> 0) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 7) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 14) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 21) & 0x7F) | 0x80));
+            put<uint8>((uint8)((v >> 28) & 0x7F));
+        }
+
+        return *this;
+    }
+
+    //! Append a varint to the buffer.
+    inline binary_buffer & put_varint(int v)
+    {
+        return put_varint((uint32)v);
+    }
+
+    //! Append a varint to the buffer.
+    inline binary_buffer & put_varint(uint64 v)
+    {
+        if (v < 128) {
+            put<uint8>(uint8(v));
+        }
+        else if (v < 128 * 128) {
+            put<uint8>((uint8)(((v >> 00) & 0x7F) | 0x80));
+            put<uint8>((uint8)((v >> 07) & 0x7F));
+        }
+        else if (v < 128 * 128 * 128) {
+            put<uint8>((uint8)(((v >> 00) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 07) & 0x7F) | 0x80));
+            put<uint8>((uint8)((v >> 14) & 0x7F));
+        }
+        else if (v < 128 * 128 * 128 * 128) {
+            put<uint8>((uint8)(((v >> 00) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 07) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 14) & 0x7F) | 0x80));
+            put<uint8>((uint8)((v >> 21) & 0x7F));
+        }
+        else if (v < ((uint64)128) * 128 * 128 * 128 * 128) {
+            put<uint8>((uint8)(((v >> 00) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 07) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 14) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 21) & 0x7F) | 0x80));
+            put<uint8>((uint8)((v >> 28) & 0x7F));
+        }
+        else if (v < ((uint64)128) * 128 * 128 * 128 * 128 * 128) {
+            put<uint8>((uint8)(((v >> 00) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 07) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 14) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 21) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 28) & 0x7F) | 0x80));
+            put<uint8>((uint8)((v >> 35) & 0x7F));
+        }
+        else if (v < ((uint64)128) * 128 * 128 * 128 * 128 * 128 * 128) {
+            put<uint8>((uint8)(((v >> 00) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 07) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 14) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 21) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 28) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 35) & 0x7F) | 0x80));
+            put<uint8>((uint8)((v >> 42) & 0x7F));
+        }
+        else if (v < ((uint64)128) * 128 * 128 * 128 * 128 * 128 * 128 * 128) {
+            put<uint8>((uint8)(((v >> 00) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 07) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 14) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 21) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 28) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 35) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 42) & 0x7F) | 0x80));
+            put<uint8>((uint8)((v >> 49) & 0x7F));
+        }
+        else if (v < ((uint64)128) * 128 * 128 * 128 * 128 * 128 * 128 * 128 * 128) {
+            put<uint8>((uint8)(((v >> 00) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 07) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 14) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 21) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 28) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 35) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 42) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 49) & 0x7F) | 0x80));
+            put<uint8>((uint8)((v >> 56) & 0x7F));
+        }
+        else {
+            put<uint8>((uint8)(((v >> 00) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 07) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 14) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 21) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 28) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 35) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 42) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 49) & 0x7F) | 0x80));
+            put<uint8>((uint8)(((v >> 56) & 0x7F) | 0x80));
+            put<uint8>((uint8)((v >> 63) & 0x7F));
+        }
+
+        return *this;
+    }
+
+    //! Put a string by saving it's length followed by the data itself.
+    inline binary_buffer & put_string(const char* data, size_t len)
+    {
+        return put_varint((uint32)len).append(data, len);
+    }
+
+    //! Put a string by saving it's length followed by the data itself.
+    inline binary_buffer & put_string(const std::string& str)
+    {
+        return put_string(str.data(), str.size());
+    }
+
+    //! Put a binary_buffer by saving it's length followed by the data itself.
+    inline binary_buffer & put_string(const binary_buffer& bb)
+    {
+        return put_string(bb.data(), bb.size());
+    }
+};
+
+/*!
+ * binary_buffer_ref represents a memory area as pointer and valid length. It
+ * is not deallocated or otherwise managed. This class can be used to pass
+ * around references to binary_buffer objects.
+ */
+class binary_buffer_ref
+{
+protected:
+    //! Allocated buffer pointer.
+    const char* m_data;
+
+    //! Size of valid data.
+    size_t m_size;
+
+public:
+    //! Constructor, assign memory area from binary_buffer.
+    binary_buffer_ref(const binary_buffer& bb)
+        : m_data(bb.data()), m_size(bb.size())
+    { }
+
+    //! Constructor, assign memory area from pointer and length.
+    binary_buffer_ref(const void* data, size_t n)
+        : m_data(reinterpret_cast<const char*>(data)), m_size(n)
+    { }
+
+    //! Constructor, assign memory area from string, does NOT copy.
+    inline binary_buffer_ref(const std::string& str)
+        : m_data(str.data()), m_size(str.size())
+    { }
+
+    //! Return a pointer to the currently kept memory area.
+    const void * data() const
+    { return m_data; }
+
+    //! Return the currently valid length in bytes.
+    size_t size() const
+    { return m_size; }
+
+    //! Explicit conversion to std::string (copies memory of course).
+    inline std::string str() const
+    { return std::string(reinterpret_cast<const char*>(m_data), m_size); }
+
+    //! Compare contents of two binary_buffer_refs.
+    bool operator == (const binary_buffer_ref& br) const
+    {
+        if (m_size != br.m_size) return false;
+        return memcmp(m_data, br.m_data, m_size) == 0;
+    }
+
+    //! Compare contents of two binary_buffer_refs.
+    bool operator != (const binary_buffer_ref& br) const
+    {
+        if (m_size != br.m_size) return true;
+        return memcmp(m_data, br.m_data, m_size) != 0;
+    }
+};
+
+/*!
+ * binary_reader represents a binary_buffer_ref with an additional cursor with which
+ * the memory can be read incrementally.
+ */
+class binary_reader : public binary_buffer_ref
+{
+protected:
+    //! Current read cursor
+    size_t m_curr;
+
+public:
+    //! Constructor, assign memory area from binary_buffer.
+    inline binary_reader(const binary_buffer_ref& br)
+        : binary_buffer_ref(br), m_curr(0)
+    { }
+
+    //! Constructor, assign memory area from pointer and length.
+    inline binary_reader(const void* data, size_t n)
+        : binary_buffer_ref(data, n), m_curr(0)
+    { }
+
+    //! Constructor, assign memory area from string, does NOT copy.
+    inline binary_reader(const std::string& str)
+        : binary_buffer_ref(str), m_curr(0)
+    { }
+
+    //! Return the current read cursor.
+    inline size_t curr() const
+    {
+        return m_curr;
+    }
+
+    //! Reset the read cursor.
+    inline binary_reader & rewind()
+    {
+        m_curr = 0;
+        return *this;
+    }
+
+    //! Check that n bytes are available at the cursor.
+    inline bool cursor_available(size_t n) const
+    {
+        return (m_curr + n <= m_size);
+    }
+
+    //! Throws a std::underflow_error unless n bytes are available at the
+    //! cursor.
+    inline void check_available(size_t n) const
+    {
+        if (!cursor_available(n))
+            throw (std::underflow_error("binary_reader underrun"));
+    }
+
+    //! Return true if the cursor is at the end of the buffer.
+    inline bool empty() const
+    {
+        return (m_curr == m_size);
+    }
+
+    //! Advance the cursor given number of bytes without reading them.
+    inline binary_reader & skip(size_t n)
+    {
+        check_available(n);
+        m_curr += n;
+
+        return *this;
+    }
+
+    //! Fetch a number of unstructured bytes from the buffer, advancing the
+    //! cursor.
+    inline binary_reader & read(void* outdata, size_t datalen)
+    {
+        check_available(datalen);
+        memcpy(outdata, m_data + m_curr, datalen);
+        m_curr += datalen;
+
+        return *this;
+    }
+
+    //! Fetch a number of unstructured bytes from the buffer as std::string,
+    //! advancing the cursor.
+    inline std::string read(size_t datalen)
+    {
+        check_available(datalen);
+        std::string out(m_data + m_curr, datalen);
+        m_curr += datalen;
+        return out;
+    }
+
+    //! Fetch a single item of the template type Type from the buffer,
+    //! advancing the cursor. Be careful with implicit type conversions!
+    template <typename Type>
+    inline Type get()
+    {
+        check_available(sizeof(Type));
+
+        Type ret = *reinterpret_cast<const Type*>(m_data + m_curr);
+        m_curr += sizeof(Type);
+
+        return ret;
+    }
+
+    //! Fetch a varint with up to 32-bit from the buffer at the cursor.
+    inline uint32 get_varint()
+    {
+        uint32 u, v = get<uint8>();
+        if (!(v & 0x80)) return v;
+        v &= 0x7F;
+        u = get<uint8>(), v |= (u & 0x7F) << 7;
+        if (!(u & 0x80)) return v;
+        u = get<uint8>(), v |= (u & 0x7F) << 14;
+        if (!(u & 0x80)) return v;
+        u = get<uint8>(), v |= (u & 0x7F) << 21;
+        if (!(u & 0x80)) return v;
+        u = get<uint8>();
+        if (u & 0xF0)
+            throw (std::overflow_error("Overflow during varint decoding."));
+        v |= (u & 0x7F) << 28;
+        return v;
+    }
+
+    //! Fetch a 64-bit varint from the buffer at the cursor.
+    inline uint64 get_varint64()
+    {
+        uint64 u, v = get<uint8>();
+        if (!(v & 0x80)) return v;
+        v &= 0x7F;
+        u = get<uint8>(), v |= (u & 0x7F) << 7;
+        if (!(u & 0x80)) return v;
+        u = get<uint8>(), v |= (u & 0x7F) << 14;
+        if (!(u & 0x80)) return v;
+        u = get<uint8>(), v |= (u & 0x7F) << 21;
+        if (!(u & 0x80)) return v;
+        u = get<uint8>(), v |= (u & 0x7F) << 28;
+        if (!(u & 0x80)) return v;
+        u = get<uint8>(), v |= (u & 0x7F) << 35;
+        if (!(u & 0x80)) return v;
+        u = get<uint8>(), v |= (u & 0x7F) << 42;
+        if (!(u & 0x80)) return v;
+        u = get<uint8>(), v |= (u & 0x7F) << 49;
+        if (!(u & 0x80)) return v;
+        u = get<uint8>(), v |= (u & 0x7F) << 56;
+        if (!(u & 0x80)) return v;
+        u = get<uint8>();
+        if (u & 0xFE)
+            throw (std::overflow_error("Overflow during varint64 decoding."));
+        v |= (u & 0x7F) << 63;
+        return v;
+    }
+
+    //! Fetch a string which was put via put_string().
+    inline std::string get_string()
+    {
+        uint32 len = get_varint();
+        return read(len);
+    }
+
+    //! Fetch a binary_buffer_ref to a binary string or blob which was put via
+    //! put_string(). Does NOT copy the data.
+    inline binary_buffer_ref get_binary_buffer_ref()
+    {
+        uint32 len = get_varint();
+        // save object
+        binary_buffer_ref br(m_data + m_curr, len);
+        // skip over sub block data
+        skip(len);
+        return br;
+    }
+};
+
+//! \}
+
+STXXL_END_NAMESPACE
+
+#endif // !STXXL_COMMON_BINARY_BUFFER_HEADER
diff --git a/include/stxxl/bits/common/cmdline.h b/include/stxxl/bits/common/cmdline.h
index 55bcf79..dbab92c 100644
--- a/include/stxxl/bits/common/cmdline.h
+++ b/include/stxxl/bits/common/cmdline.h
@@ -26,9 +26,12 @@
 
 STXXL_BEGIN_NAMESPACE
 
+//! \addtogroup support
+//! \{
+
 /**
- * \brief Command line parser which automatically fills variables and prints
- * nice usage messages.
+ * Command line parser which automatically fills variables and prints nice
+ * usage messages.
  *
  * This is a straightforward command line parser in C++, which will recognize
  * short options -s, long options --long and parameters, both required and
@@ -197,13 +200,46 @@ protected:
     };
 
     //! specialization of argument for SI/IEC suffixes byte size options or parameters
-    struct argument_bytes : public argument
+    struct argument_bytes32 : public argument
+    {
+        uint32& m_dest;
+
+        //! contructor filling most attributes
+        argument_bytes32(char key, const std::string& longkey, const std::string& keytype,
+                         const std::string& desc, bool required, uint32& dest)
+            : argument(key, longkey, keytype, desc, required),
+              m_dest(dest)
+        { }
+
+        virtual const char * type_name() const
+        { return "bytes"; }
+
+        //! parse byte size using SI/IEC parser from stxxl.
+        virtual bool process(int& argc, const char* const*& argv)
+        {
+            if (argc == 0) return false;
+            uint64 dest;
+            if (parse_SI_IEC_size(argv[0], dest) &&
+                (uint64)(m_dest = (uint32)dest) == dest) {
+                --argc, ++argv;
+                return true;
+            } else {
+                return false;
+            }
+        }
+
+        virtual void print_value(std::ostream& os) const
+        { os << m_dest; }
+    };
+
+    //! specialization of argument for SI/IEC suffixes byte size options or parameters
+    struct argument_bytes64 : public argument
     {
         uint64& m_dest;
 
         //! contructor filling most attributes
-        argument_bytes(char key, const std::string& longkey, const std::string& keytype,
-                       const std::string& desc, bool required, uint64& dest)
+        argument_bytes64(char key, const std::string& longkey, const std::string& keytype,
+                         const std::string& desc, bool required, uint64& dest)
             : argument(key, longkey, keytype, desc, required),
               m_dest(dest)
         { }
@@ -303,9 +339,9 @@ protected:
     arglist_type m_paramlist;
 
     //! formatting width for options, '-s, --switch <#>'
-    size_t m_opt_maxlong;
+    int m_opt_maxlong;
     //! formatting width for parameters, 'param <#>'
-    size_t m_param_maxlong;
+    int m_param_maxlong;
 
     //! argv[0] for usage.
     const char* m_progname;
@@ -328,13 +364,15 @@ private:
     //! update maximum formatting width for new option
     void calc_opt_max(const argument* arg)
     {
-        m_opt_maxlong = STXXL_MAX(arg->option_text().size() + 2, m_opt_maxlong);
+        m_opt_maxlong = STXXL_MAX((int)arg->option_text().size() + 2,
+                                  m_opt_maxlong);
     }
 
     //! update maximum formatting width for new parameter
     void calc_param_max(const argument* arg)
     {
-        m_param_maxlong = STXXL_MAX(arg->param_text().size() + 2, m_param_maxlong);
+        m_param_maxlong = STXXL_MAX((int)arg->param_text().size() + 2,
+                                    m_param_maxlong);
     }
 
 public:
@@ -416,10 +454,19 @@ public:
     }
 
     //! add SI/IEC suffixes byte size option -key, --longkey [keytype] and store to 64-bit dest
+    void add_bytes(char key, const std::string& longkey, const std::string& keytype, const std::string& desc, stxxl::uint32& dest)
+    {
+        m_optlist.push_back(
+            new argument_bytes32(key, longkey, keytype, desc, false, dest)
+            );
+        calc_opt_max(m_optlist.back());
+    }
+
+    //! add SI/IEC suffixes byte size option -key, --longkey [keytype] and store to 64-bit dest
     void add_bytes(char key, const std::string& longkey, const std::string& keytype, const std::string& desc, stxxl::uint64& dest)
     {
         m_optlist.push_back(
-            new argument_bytes(key, longkey, keytype, desc, false, dest)
+            new argument_bytes64(key, longkey, keytype, desc, false, dest)
             );
         calc_opt_max(m_optlist.back());
     }
@@ -454,6 +501,10 @@ public:
     void add_uint(char key, const std::string& longkey, const std::string& desc, unsigned int& dest)
     { return add_uint(key, longkey, "", desc, dest); }
 
+    //! add SI/IEC suffixes byte size option -key, --longkey [keytype] and store to 32-bit dest
+    void add_bytes(char key, const std::string& longkey, const std::string& desc, stxxl::uint32& dest)
+    { return add_bytes(key, longkey, "", desc, dest); }
+
     //! add SI/IEC suffixes byte size option -key, --longkey [keytype] and store to 64-bit dest
     void add_bytes(char key, const std::string& longkey, const std::string& desc, stxxl::uint64& dest)
     { return add_bytes(key, longkey, "", desc, dest); }
@@ -487,10 +538,19 @@ public:
     }
 
     //! add SI/IEC suffixes byte size parameter [name] with description and store to dest
+    void add_param_bytes(const std::string& name, const std::string& desc, uint32& dest)
+    {
+        m_paramlist.push_back(
+            new argument_bytes32(0, name, "", desc, true, dest)
+            );
+        calc_param_max(m_paramlist.back());
+    }
+
+    //! add SI/IEC suffixes byte size parameter [name] with description and store to dest
     void add_param_bytes(const std::string& name, const std::string& desc, uint64& dest)
     {
         m_paramlist.push_back(
-            new argument_bytes(0, name, "", desc, true, dest)
+            new argument_bytes64(0, name, "", desc, true, dest)
             );
         calc_param_max(m_paramlist.back());
     }
@@ -535,10 +595,19 @@ public:
     }
 
     //! add optional SI/IEC suffixes byte size parameter [name] with description and store to dest
+    void add_opt_param_bytes(const std::string& name, const std::string& desc, uint32& dest)
+    {
+        m_paramlist.push_back(
+            new argument_bytes32(0, name, "", desc, false, dest)
+            );
+        calc_param_max(m_paramlist.back());
+    }
+
+    //! add optional SI/IEC suffixes byte size parameter [name] with description and store to dest
     void add_opt_param_bytes(const std::string& name, const std::string& desc, uint64& dest)
     {
         m_paramlist.push_back(
-            new argument_bytes(0, name, "", desc, false, dest)
+            new argument_bytes64(0, name, "", desc, false, dest)
             );
         calc_param_max(m_paramlist.back());
     }
@@ -586,6 +655,8 @@ public:
     void print_result(std::ostream& os = std::cout);
 };
 
+//! \}
+
 STXXL_END_NAMESPACE
 
 #endif // !STXXL_COMMON_CMDLINE_HEADER
diff --git a/include/stxxl/bits/common/condition_variable.h b/include/stxxl/bits/common/condition_variable.h
index a1e9f1c..3ffcafc 100644
--- a/include/stxxl/bits/common/condition_variable.h
+++ b/include/stxxl/bits/common/condition_variable.h
@@ -31,7 +31,6 @@
  #error "Thread implementation not detected."
 #endif
 
-
 STXXL_BEGIN_NAMESPACE
 
 #if STXXL_STD_THREADS
diff --git a/include/stxxl/bits/common/counting_ptr.h b/include/stxxl/bits/common/counting_ptr.h
index 6beb2d0..6eab3fd 100644
--- a/include/stxxl/bits/common/counting_ptr.h
+++ b/include/stxxl/bits/common/counting_ptr.h
@@ -21,9 +21,11 @@
 #include <stxxl/bits/config.h>
 #include <stxxl/bits/common/mutex.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
+//! \addtogroup support
+//! \{
+
 /*!
  * High-performance smart pointer used as a wrapping reference counting
  * pointer.
@@ -516,6 +518,8 @@ public:
 
 #endif
 
+//! \}
+
 STXXL_END_NAMESPACE
 
 #endif // !STXXL_COMMON_COUNTING_PTR_HEADER
diff --git a/include/stxxl/bits/common/error_handling.h b/include/stxxl/bits/common/error_handling.h
index 22e26e6..534834f 100644
--- a/include/stxxl/bits/common/error_handling.h
+++ b/include/stxxl/bits/common/error_handling.h
@@ -26,7 +26,6 @@
 #include <stxxl/bits/common/exceptions.h>
 #include <stxxl/bits/config.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 #if STXXL_MSVC
diff --git a/include/stxxl/bits/common/exceptions.h b/include/stxxl/bits/common/exceptions.h
index 2a13a88..72d8f0c 100644
--- a/include/stxxl/bits/common/exceptions.h
+++ b/include/stxxl/bits/common/exceptions.h
@@ -20,66 +20,65 @@
 
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 class io_error : public std::ios_base::failure
 {
 public:
-    io_error() throw () :
-        std::ios_base::failure("")
+    io_error() throw ()
+        : std::ios_base::failure("")
     { }
 
-    io_error(const std::string& msg_) throw () :
-        std::ios_base::failure(msg_)
+    io_error(const std::string& message) throw ()
+        : std::ios_base::failure(message)
     { }
 };
 
 class resource_error : public std::runtime_error
 {
 public:
-    resource_error() throw () :
-        std::runtime_error("")
+    resource_error() throw ()
+        : std::runtime_error("")
     { }
 
-    resource_error(const std::string& msg_) throw () :
-        std::runtime_error(msg_)
+    resource_error(const std::string& message) throw ()
+        : std::runtime_error(message)
     { }
 };
 
 class bad_ext_alloc : public std::runtime_error
 {
 public:
-    bad_ext_alloc() throw () :
-        std::runtime_error("")
+    bad_ext_alloc() throw ()
+        : std::runtime_error("")
     { }
 
-    bad_ext_alloc(const std::string& msg_) throw () :
-        std::runtime_error(msg_)
+    bad_ext_alloc(const std::string& message) throw ()
+        : std::runtime_error(message)
     { }
 };
 
 class bad_parameter : public std::runtime_error
 {
 public:
-    bad_parameter() throw () :
-        std::runtime_error("")
+    bad_parameter() throw ()
+        : std::runtime_error("")
     { }
 
-    bad_parameter(const std::string& msg_) throw () :
-        std::runtime_error(msg_)
+    bad_parameter(const std::string& message) throw ()
+        : std::runtime_error(message)
     { }
 };
 
 class unreachable : public std::runtime_error
 {
 public:
-    unreachable() throw () :
-        std::runtime_error("")
+    unreachable() throw ()
+        : std::runtime_error("")
     { }
 
-    unreachable(const std::string& msg_) throw () :
-        std::runtime_error(msg_)
+    unreachable(const std::string& message) throw ()
+        : std::runtime_error(message)
     { }
 };
 
diff --git a/include/stxxl/bits/common/exithandler.h b/include/stxxl/bits/common/exithandler.h
index eff32f8..d85f2eb 100644
--- a/include/stxxl/bits/common/exithandler.h
+++ b/include/stxxl/bits/common/exithandler.h
@@ -15,17 +15,19 @@
 
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-// There are several possibilities for the exit handlers.
-// To use the default implementation (which uses atexit()), nothing special has to be done.
+// There are several possibilities for the exit handlers.  To use the default
+// implementation (which uses atexit()), nothing special has to be done.
 //
-// To work around problems with atexit() being used in a dll you may #define STXXL_NON_DEFAULT_EXIT_HANDLER at library compilation time.
-// In this case the library/application should call stxxl::run_exit_handlers() during shutdown.
+// To work around problems with atexit() being used in a dll you may #define
+// STXXL_NON_DEFAULT_EXIT_HANDLER at library compilation time.  In this case
+// the library/application should call stxxl::run_exit_handlers() during
+// shutdown.
 //
-// To provide your own exit handler implementation, #define STXXL_EXTERNAL_EXIT_HANDLER and implement
-// stxxl::register_exit_handler(void (*)(void)) and stxxl::run_exit_handlers() in your application.
+// To provide your own exit handler implementation, #define
+// STXXL_EXTERNAL_EXIT_HANDLER and implement stxxl::register_exit_handler(void
+// (*)(void)) and stxxl::run_exit_handlers() in your application.
 
 int register_exit_handler(void (* function)(void));
 void run_exit_handlers();
diff --git a/include/stxxl/bits/common/external_shared_ptr.h b/include/stxxl/bits/common/external_shared_ptr.h
new file mode 100644
index 0000000..d8e6594
--- /dev/null
+++ b/include/stxxl/bits/common/external_shared_ptr.h
@@ -0,0 +1,119 @@
+/***************************************************************************
+ *  include/stxxl/bits/common/external_shared_ptr.h
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2011 Daniel Godas-Lopez <dgodas at gmail.com>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#ifndef STXXL_COMMON_EXTERNAL_SHARED_PTR_HEADER
+#define STXXL_COMMON_EXTERNAL_SHARED_PTR_HEADER
+
+#include <stxxl/bits/namespace.h>
+#include <ostream>
+
+STXXL_BEGIN_NAMESPACE
+
+//! \addtogroup support
+//! \{
+
+/*!
+ * This class takes a shared pointer, increments its reference count and wraps
+ * it in a way that the resulting object can be copied, dumped to disk, and
+ * destroyed without affecting the refcount. When the object is retrieved from
+ * disk and recreated on internal memory, it will still hold a reference to the
+ * same memory block and can be used right away by calling the "get" method or
+ * unwrapped with the "unwrap" method to decrement the refcount.
+ *
+ * In the context of this template, a shared pointer is an object of a class P
+ * that fulfills the following requirements:
+ *
+ *   - Can be copy-constructed
+ *   - Has an assignment operator (so that the get method can be used)
+ *   - Contains a pointer to a reference count stored outside the class
+ *   - Increments the reference count on copy-construction
+ *   - Decrements the reference count on destruction
+ *
+ * Both the Boost and c++0x implementations of shared_ptr fulfill these
+ * requirements. At the moment of writing the author is not aware of any
+ * implementations of shared pointers that can't be used with this wrapper.
+ */
+template <class P>
+class external_shared_ptr
+{
+private:
+    /*!
+     * We store the pointer like this so that the refcount does not get
+     * incremented when the wrapper is copy-constructed, or decremented when
+     * the wrapper is destroyed.
+     *
+     * The whole external_shared_ptr object will be aligned by the compiler to
+     * a multiple of its size. The size of the object is sizeof(P) as the
+     * buffer is its only member. The buffer is placed in the class at offset 0
+     * so the alignment of the stored P should be alright without any
+     * additional hints.
+     */
+    char data[sizeof(P)];
+
+public:
+    /*!
+     * This constructor needs to be defined so that the [] operator in maps and
+     * hash tables works. If unwrap() or get() are called for an object
+     * constructed this way the behavior is undefined.
+     */
+    external_shared_ptr()
+    { }
+
+    /*!
+     * Copy the pointer to internal storage and increment the refcount (the
+     * destructor never gets called).
+     */
+    external_shared_ptr(P ptr)
+    {
+        new (data)P(ptr);
+    }
+
+    /*!
+     * Call the destructor to decrement the refcount. If this is called more
+     * than once the results are undefined.
+     */
+    void unwrap()
+    {
+        P* p = reinterpret_cast<P*>((void*)data);
+        p->~P();
+    }
+
+    /*!
+     * If this is called after unwrap() the behaviour is undefined.
+     */
+    P get() const
+    {
+        P* p = reinterpret_cast<P*>((void*)data);
+        return *p;
+    }
+
+    bool operator == (const external_shared_ptr& x) const
+    {
+        P* p1 = reinterpret_cast<P*>((void*)data);
+        P* p2 = reinterpret_cast<P*>((void*)x.data);
+
+        return *p1 == *p2;
+    }
+
+    //! Output contained data items
+    friend std::ostream&
+    operator << (std::ostream& os, const external_shared_ptr& p)
+    {
+        return os << p.get();
+    }
+};
+
+//! \}
+
+STXXL_END_NAMESPACE
+
+#endif // !STXXL_COMMON_EXTERNAL_SHARED_PTR_HEADER
diff --git a/include/stxxl/bits/common/is_sorted.h b/include/stxxl/bits/common/is_sorted.h
index 649b47b..65d3cfa 100644
--- a/include/stxxl/bits/common/is_sorted.h
+++ b/include/stxxl/bits/common/is_sorted.h
@@ -16,51 +16,50 @@
 
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-template <class _ForwardIter>
-bool is_sorted_helper(_ForwardIter __first, _ForwardIter __last)
+template <class ForwardIterator>
+bool is_sorted_helper(ForwardIterator first, ForwardIterator last)
 {
-    if (__first == __last)
+    if (first == last)
         return true;
 
-    _ForwardIter __next = __first;
-    for (++__next; __next != __last; __first = __next, ++__next) {
-        if (*__next < *__first)
+    ForwardIterator next = first;
+    for (++next; next != last; first = next, ++next) {
+        if (*next < *first)
             return false;
     }
 
     return true;
 }
 
-template <class _ForwardIter, class _StrictWeakOrdering>
-bool is_sorted_helper(_ForwardIter __first, _ForwardIter __last,
-                      _StrictWeakOrdering __comp)
+template <class ForwardIterator, class StrictWeakOrdering>
+bool is_sorted_helper(ForwardIterator first, ForwardIterator last,
+                      StrictWeakOrdering comp)
 {
-    if (__first == __last)
+    if (first == last)
         return true;
 
-    _ForwardIter __next = __first;
-    for (++__next; __next != __last; __first = __next, ++__next) {
-        if (__comp(*__next, *__first))
+    ForwardIterator next = first;
+    for (++next; next != last; first = next, ++next) {
+        if (comp(*next, *first))
             return false;
     }
 
     return true;
 }
 
-template <class _ForwardIter>
-bool is_sorted(_ForwardIter __first, _ForwardIter __last)
+template <class ForwardIterator>
+bool is_sorted(ForwardIterator first, ForwardIterator last)
 {
-    return is_sorted_helper(__first, __last);
+    return is_sorted_helper(first, last);
 }
 
-template <class _ForwardIter, class _StrictWeakOrdering>
-bool is_sorted(_ForwardIter __first, _ForwardIter __last,
-               _StrictWeakOrdering __comp)
+template <class ForwardIterator, class StrictWeakOrdering>
+bool is_sorted(ForwardIterator first, ForwardIterator last,
+               StrictWeakOrdering comp)
 {
-    return is_sorted_helper(__first, __last, __comp);
+    return is_sorted_helper(first, last, comp);
 }
 
 STXXL_END_NAMESPACE
diff --git a/include/stxxl/bits/common/log.h b/include/stxxl/bits/common/log.h
index 0decbbf..c57e46d 100644
--- a/include/stxxl/bits/common/log.h
+++ b/include/stxxl/bits/common/log.h
@@ -19,7 +19,6 @@
 #include <stxxl/bits/namespace.h>
 #include <stxxl/bits/singleton.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 class logger : public singleton<logger>
diff --git a/include/stxxl/bits/common/mutex.h b/include/stxxl/bits/common/mutex.h
index e2fc68c..49c724b 100644
--- a/include/stxxl/bits/common/mutex.h
+++ b/include/stxxl/bits/common/mutex.h
@@ -31,7 +31,6 @@
  #error "Thread implementation not detected."
 #endif
 
-
 STXXL_BEGIN_NAMESPACE
 
 #if STXXL_STD_THREADS
diff --git a/include/stxxl/bits/common/new_alloc.h b/include/stxxl/bits/common/new_alloc.h
index 84e9d68..2d2df7f 100644
--- a/include/stxxl/bits/common/new_alloc.h
+++ b/include/stxxl/bits/common/new_alloc.h
@@ -18,44 +18,42 @@
 #include <limits>
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-template <class T>
+template <class Type>
 class new_alloc;
 
-template <typename T, typename U>
+template <typename Type, typename Rebind>
 struct new_alloc_rebind;
 
-template <typename T>
-struct new_alloc_rebind<T, T>{
-    typedef new_alloc<T> other;
+template <typename Type>
+struct new_alloc_rebind<Type, Type>{
+    typedef new_alloc<Type> other;
 };
 
-template <typename T, typename U>
+template <typename Type, typename Rebind>
 struct new_alloc_rebind {
-    typedef std::allocator<U> other;
+    typedef std::allocator<Rebind> other;
 };
 
-
 // designed for typed_block (to use with std::vector)
-template <class T>
+template <class Type>
 class new_alloc
 {
 public:
     // type definitions
-    typedef T value_type;
-    typedef T* pointer;
-    typedef const T* const_pointer;
-    typedef T& reference;
-    typedef const T& const_reference;
+    typedef Type value_type;
+    typedef Type* pointer;
+    typedef const Type* const_pointer;
+    typedef Type& reference;
+    typedef const Type& const_reference;
     typedef std::size_t size_type;
     typedef std::ptrdiff_t difference_type;
 
-    // rebind allocator to type U, use new_alloc only if U == T
-    template <class U>
+    // rebind allocator to type Rebind, use new_alloc only if Rebind == Type
+    template <class Rebind>
     struct rebind {
-        typedef typename new_alloc_rebind<T, U>::other other;
+        typedef typename new_alloc_rebind<Type, Rebind>::other other;
     };
 
     // return address of values
@@ -70,43 +68,43 @@ public:
 
     new_alloc() throw () { }
     new_alloc(const new_alloc&) throw () { }
-    template <class U>
-    new_alloc(const new_alloc<U>&) throw () { }
+    template <class Rebind>
+    new_alloc(const new_alloc<Rebind>&) throw () { }
     ~new_alloc() throw () { }
 
-    template <class U>
-    operator std::allocator<U>()
+    template <class Rebind>
+    operator std::allocator<Rebind>()
     {
-        static std::allocator<U> helper_allocator;
+        static std::allocator<Rebind> helper_allocator;
         return helper_allocator;
     }
 
     // return maximum number of elements that can be allocated
     size_type max_size() const throw ()
     {
-        return std::numeric_limits<size_type>::max() / sizeof(T);
+        return std::numeric_limits<size_type>::max() / sizeof(Type);
     }
 
-    // allocate but don't initialize num elements of type T
+    // allocate but don't initialize num elements of type Type
     pointer allocate(size_type num, const void* = 0)
     {
-        return static_cast<T*>(T::operator new (num * sizeof(T)));
+        return static_cast<Type*>(Type::operator new (num * sizeof(Type)));
     }
 
     // _GLIBCXX_RESOLVE_LIB_DEFECTS
     // 402. wrong new expression in [some_] allocator::construct
     // initialize elements of allocated storage p with value value
-    void construct(pointer p, const T& value)
+    void construct(pointer p, const Type& value)
     {
         // initialize memory with placement new
-        ::new ((void*)p)T(value);
+        ::new ((void*)p)Type(value);
     }
 
 #ifdef __GXX_EXPERIMENTAL_CXX0X__
     template <typename ... Args>
     void construct(pointer p, Args&& ... args)
     {
-        ::new ((void*)p)T(std::forward<Args>(args) ...);
+        ::new ((void*)p)Type(std::forward<Args>(args) ...);
     }
 #endif
 
@@ -114,27 +112,27 @@ public:
     void destroy(pointer p)
     {
         // destroy objects by calling their destructor
-        p->~T();
+        p->~Type();
     }
 
     // deallocate storage p of deleted elements
     void deallocate(pointer p, size_type /*num*/)
     {
-        T::operator delete (p);
+        Type::operator delete (p);
     }
 };
 
 // return that all specializations of this allocator are interchangeable
-template <class T1, class T2>
-inline bool operator == (const new_alloc<T1>&,
-                         const new_alloc<T2>&) throw ()
+template <class Type1, class Type2>
+inline bool operator == (const new_alloc<Type1>&,
+                         const new_alloc<Type2>&) throw ()
 {
     return true;
 }
 
-template <class T1, class T2>
-inline bool operator != (const new_alloc<T1>&,
-                         const new_alloc<T2>&) throw ()
+template <class Type1, class Type2>
+inline bool operator != (const new_alloc<Type1>&,
+                         const new_alloc<Type2>&) throw ()
 {
     return false;
 }
diff --git a/include/stxxl/bits/common/onoff_switch.h b/include/stxxl/bits/common/onoff_switch.h
index 1662213..2d87fa3 100644
--- a/include/stxxl/bits/common/onoff_switch.h
+++ b/include/stxxl/bits/common/onoff_switch.h
@@ -23,7 +23,6 @@
 #include <stxxl/bits/common/mutex.h>
 #include <stxxl/bits/common/condition_variable.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 class onoff_switch : private noncopyable
diff --git a/include/stxxl/bits/common/rand.h b/include/stxxl/bits/common/rand.h
index 61d8968..ec6fab3 100644
--- a/include/stxxl/bits/common/rand.h
+++ b/include/stxxl/bits/common/rand.h
@@ -35,11 +35,13 @@
 // 3. stxxl::srandom_number32(); // seed the global state of stxxl::random_number32
 // 4. create all the other prngs used.
 
-
 STXXL_BEGIN_NAMESPACE
 
 extern unsigned ran32State;
 
+//! \addtogroup support
+//! \{
+
 //! Fast uniform [0, 2^32) pseudo-random generator with period 2^32, random
 //! bits: 32.
 //! \warning Uses a global state and is not reentrant or thread-safe!
@@ -241,11 +243,11 @@ struct random_uniform_slow
 };
 
 //! Uniform [0, N) pseudo-random generator
-template <class UniformRGen_ = random_uniform_fast>
+template <class UniformRGen = random_uniform_fast>
 struct random_number
 {
     typedef unsigned value_type;
-    UniformRGen_ uniform;
+    UniformRGen uniform;
 
     random_number(unsigned seed = 0) : uniform(seed)
     { }
@@ -283,6 +285,8 @@ struct random_number64
 #pragma warning(pop) // assignment operator could not be generated
 #endif
 
+//! \}
+
 STXXL_END_NAMESPACE
 
 #endif // !STXXL_COMMON_RAND_HEADER
diff --git a/include/stxxl/bits/common/seed.h b/include/stxxl/bits/common/seed.h
index eb6d019..48a6f52 100644
--- a/include/stxxl/bits/common/seed.h
+++ b/include/stxxl/bits/common/seed.h
@@ -15,7 +15,6 @@
 
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! set the global stxxl seed value
diff --git a/include/stxxl/bits/common/semaphore.h b/include/stxxl/bits/common/semaphore.h
index 5624e39..4010c41 100644
--- a/include/stxxl/bits/common/semaphore.h
+++ b/include/stxxl/bits/common/semaphore.h
@@ -18,7 +18,6 @@
 #include <stxxl/bits/common/mutex.h>
 #include <stxxl/bits/common/condition_variable.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 class semaphore : private noncopyable
diff --git a/include/stxxl/bits/common/settings.h b/include/stxxl/bits/common/settings.h
index 2338df1..da55ce7 100644
--- a/include/stxxl/bits/common/settings.h
+++ b/include/stxxl/bits/common/settings.h
@@ -20,18 +20,17 @@
 
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-template <typename must_be_int = int>
+template <typename MustBeInt = int>
 class settings
 {
 public:
     static bool native_merge;
 };
 
-template <typename must_be_int>
-bool settings<must_be_int>::native_merge = true;
+template <typename MustBeInt>
+bool settings<MustBeInt>::native_merge = false;
 
 typedef settings<> SETTINGS;
 
diff --git a/include/stxxl/bits/common/simple_vector.h b/include/stxxl/bits/common/simple_vector.h
index 25dde83..c3ec901 100644
--- a/include/stxxl/bits/common/simple_vector.h
+++ b/include/stxxl/bits/common/simple_vector.h
@@ -22,9 +22,11 @@
 #include <stxxl/bits/verbose.h>
 #include <stxxl/bits/common/utils.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
+//! \addtogroup support
+//! \{
+
 /*!
  * Simpler non-growing vector without initialization.
  *
@@ -160,6 +162,9 @@ public:
         memset(m_array, 0, m_size * sizeof(value_type));
     }
 };
+
+// \}
+
 STXXL_END_NAMESPACE
 
 namespace std {
diff --git a/include/stxxl/bits/common/state.h b/include/stxxl/bits/common/state.h
index 42979fd..a819516 100644
--- a/include/stxxl/bits/common/state.h
+++ b/include/stxxl/bits/common/state.h
@@ -19,7 +19,6 @@
 #include <stxxl/bits/common/mutex.h>
 #include <stxxl/bits/common/condition_variable.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 template <typename ValueType = int>
diff --git a/include/stxxl/bits/common/timer.h b/include/stxxl/bits/common/timer.h
index 052b986..2191b7c 100644
--- a/include/stxxl/bits/common/timer.h
+++ b/include/stxxl/bits/common/timer.h
@@ -34,9 +34,11 @@
   #include <sys/time.h>
 #endif
 
-
 STXXL_BEGIN_NAMESPACE
 
+//! \addtogroup support
+//! \{
+
 //! Returns number of seconds since the epoch, high resolution.
 inline double
 timestamp()
@@ -55,7 +57,7 @@ timestamp()
 #else
     struct timeval tp;
     gettimeofday(&tp, NULL);
-    return double(tp.tv_sec) + tp.tv_usec / 1000000.;
+    return double(tp.tv_sec) + double(tp.tv_usec) / 1000000.;
 #endif
 }
 
@@ -156,17 +158,25 @@ public:
     //! on destruction: tell the time
     ~scoped_print_timer()
     {
-        if (m_bytes == 0)
-            STXXL_MSG("Finished " << m_message
-                                  << " after " << m_timer.seconds() << " seconds");
-        else
-            STXXL_MSG("Finished " << m_message
-                                  << " after " << m_timer.seconds() << " seconds. "
-                                  << "Processed " << format_IEC_size(m_bytes) << "B"
-                                  << " @ " << format_IEC_size(uint64(m_bytes / m_timer.seconds())) << "B/s");
+        if (m_bytes == 0) {
+            STXXL_MSG("Finished "
+                      << m_message
+                      << " after " << m_timer.seconds() << " seconds");
+        }
+        else {
+            double bps = (double)m_bytes / m_timer.seconds();
+
+            STXXL_MSG("Finished "
+                      << m_message
+                      << " after " << m_timer.seconds() << " seconds. "
+                      << "Processed " << format_IEC_size(m_bytes) << "B"
+                      << " @ " << format_IEC_size((uint64)bps) << "B/s");
+        }
     }
 };
 
+//! \}
+
 STXXL_END_NAMESPACE
 
 #endif // !STXXL_COMMON_TIMER_HEADER
diff --git a/include/stxxl/bits/common/tmeta.h b/include/stxxl/bits/common/tmeta.h
index d2c2ac2..81a6d32 100644
--- a/include/stxxl/bits/common/tmeta.h
+++ b/include/stxxl/bits/common/tmeta.h
@@ -20,7 +20,6 @@
 #include <stxxl/bits/namespace.h>
 #include <stxxl/bits/common/types.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! IF template metaprogramming statement.
@@ -39,7 +38,6 @@ struct IF<false, Type1, Type2>
     typedef Type2 result;
 };
 
-
 //! If \c Flag is \c true then \c IF<>::result is Num1
 //! otherwise of \c IF<>::result is Num2
 template <bool Flag, unsigned Num1, unsigned Num2>
diff --git a/include/stxxl/bits/common/tuple.h b/include/stxxl/bits/common/tuple.h
index 0bff861..56cb10b 100644
--- a/include/stxxl/bits/common/tuple.h
+++ b/include/stxxl/bits/common/tuple.h
@@ -22,7 +22,6 @@ STXXL_BEGIN_NAMESPACE
 
 struct Plug { };
 
-
 template <class T1,
           class T2,
           class T3,
@@ -55,7 +54,6 @@ struct tuple_base
     };
 };
 
-
 //! k-Tuple data type
 //!
 //! (defined for k < 7)
@@ -292,7 +290,6 @@ struct tuple<T1, T2, Plug, Plug, Plug, Plug>
     }
 };
 
-
 //! Partial specialization for 3- \c tuple (triple)
 template <class T1,
           class T2,
@@ -483,7 +480,6 @@ struct tuple<T1, T2, T3, T4, T5, Plug>
     //! Fifth tuple component type
     typedef T5 fifth_type;
 
-
     template <int I>
     struct item_type
     {
@@ -667,7 +663,6 @@ public:
     }
 };
 
-
 /**
  * Concatenates two tuple streams as streamA . streamB
  */
diff --git a/include/stxxl/bits/common/types.h b/include/stxxl/bits/common/types.h
index 6be9581..7fe461c 100644
--- a/include/stxxl/bits/common/types.h
+++ b/include/stxxl/bits/common/types.h
@@ -17,10 +17,8 @@
 #include <stxxl/bits/config.h>
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-
 #if STXXL_MSVC
 typedef __int8 int8;
 typedef unsigned __int8 uint8;
@@ -41,7 +39,6 @@ typedef long long int int64;
 typedef unsigned long long int uint64;
 #endif
 
-
 // integer types declarations
 enum { my_pointer_size = sizeof(void*) };
 
diff --git a/include/stxxl/bits/common/uint_types.h b/include/stxxl/bits/common/uint_types.h
index a031163..fc7b078 100644
--- a/include/stxxl/bits/common/uint_types.h
+++ b/include/stxxl/bits/common/uint_types.h
@@ -28,8 +28,7 @@
 STXXL_BEGIN_NAMESPACE
 
 /*!
- * \brief Construct an 40-bit or 48-bit unsigned integer stored in five or six
- * bytes.
+ * Construct an 40-bit or 48-bit unsigned integer stored in five or six bytes.
  *
  * The purpose of this class is to provide integers with smaller data storage
  * footprints when more than 32-bit, but less than 64-bit indexes are
@@ -65,29 +64,29 @@ private:
     high_type high;
 
     //! return highest value storable in lower part, also used as a mask.
-    static low_type low_max()
+    static unsigned_type low_max()
     {
         return std::numeric_limits<low_type>::max();
     }
 
     //! number of bits in the lower integer part, used a bit shift value.
-    static const int low_bits = 8 * sizeof(low_type);
+    static const size_t low_bits = 8 * sizeof(low_type);
 
     //! return highest value storable in higher part, also used as a mask.
-    static high_type high_max()
+    static unsigned_type high_max()
     {
         return std::numeric_limits<high_type>::max();
     }
 
     //! number of bits in the higher integer part, used a bit shift value.
-    static const int high_bits = 8 * sizeof(high_type);
+    static const size_t high_bits = 8 * sizeof(high_type);
 
 public:
     //! number of binary digits (bits) in uint_pair
-    static const int digits = low_bits + high_bits;
+    static const size_t digits = low_bits + high_bits;
 
     //! number of bytes in uint_pair
-    static const int bytes = sizeof(low_type) + sizeof(high_type);
+    static const size_t bytes = sizeof(low_type) + sizeof(high_type);
 
     //! empty constructor, does not even initialize to zero!
     inline uint_pair()
@@ -123,13 +122,13 @@ public:
         if (a >= 0)
             low = a;
         else
-            low = a, high = high_max();
+            low = a, high = (high_type)high_max();
     }
 
     //! construct from an uint64 (unsigned long long)
     inline uint_pair(const uint64& a)
-        : low(a & low_max()),
-          high((a >> low_bits) & high_max())
+        : low((low_type)(a & low_max())),
+          high((high_type)((a >> low_bits) & high_max()))
     {
         // check for overflow
         assert((a >> (low_bits + high_bits)) == 0);
@@ -167,7 +166,7 @@ public:
     inline uint_pair& operator -- ()
     {
         if (UNLIKELY(low == 0))
-            --high, low = low_max();
+            --high, low = (low_type)low_max();
         else
             --low;
         return *this;
@@ -177,8 +176,8 @@ public:
     inline uint_pair& operator += (const uint_pair& b)
     {
         uint64 add = low + b.low;
-        low = add & low_max();
-        high += b.high + ((add >> low_bits) & high_max());
+        low = (low_type)(add & low_max());
+        high = (high_type)(high + b.high + ((add >> low_bits) & high_max()));
         return *this;
     }
 
@@ -245,12 +244,17 @@ public:
 __attribute__ ((packed));
 #endif
 
+//! \addtogroup support
+//! \{
+
 //! Construct a 40-bit unsigned integer stored in five bytes.
 typedef uint_pair<uint8> uint40;
 
 //! Construct a 48-bit unsigned integer stored in six bytes.
 typedef uint_pair<uint16> uint48;
 
+//! \}
+
 STXXL_END_NAMESPACE
 
 namespace std {
diff --git a/include/stxxl/bits/common/utils.h b/include/stxxl/bits/common/utils.h
index 9866c94..fbc024a 100644
--- a/include/stxxl/bits/common/utils.h
+++ b/include/stxxl/bits/common/utils.h
@@ -30,7 +30,6 @@
 #include <stxxl/bits/compat/type_traits.h>
 #include <stxxl/bits/msvc_compatibility.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 ////////////////////////////////////////////////////////////////////////////
@@ -142,18 +141,18 @@ inline stxxl::uint64 atouint64(const char* s)
 
 ////////////////////////////////////////////////////////////////////////////
 
-template <typename Tp>
-inline const Tp&
-STXXL_MIN(const Tp& a, const Tp& b)
+template <typename Type>
+inline const Type&
+STXXL_MIN(const Type& a, const Type& b)
 {
-    return std::min<Tp>(a, b);
+    return std::min<Type>(a, b);
 }
 
-template <typename Tp>
-inline const Tp&
-STXXL_MAX(const Tp& a, const Tp& b)
+template <typename Type>
+inline const Type&
+STXXL_MAX(const Type& a, const Type& b)
 {
-    return std::max<Tp>(a, b);
+    return std::max<Type>(a, b);
 }
 
 ////////////////////////////////////////////////////////////////////////////
@@ -197,14 +196,14 @@ unsigned int ilog2_ceil(const IntegerType& i)
 template <typename Integral, typename Integral2>
 inline
 typename compat::remove_const<Integral>::type
-div_ceil(Integral __n, Integral2 __d)
+div_ceil(Integral n, Integral2 d)
 {
 #if 0  // ambiguous overload for std::div(unsigned_anything, unsigned_anything)
-    typedef __typeof__ (std::div(__n, __d)) div_type;
-    div_type result = std::div(__n, __d);
+    typedef __typeof__ (std::div(n, d)) div_type;
+    div_type result = std::div(n, d);
     return result.quot + (result.rem != 0);
 #else
-    return __n / __d + ((__n % __d) != 0);
+    return n / d + ((n % d) != 0);
 #endif
 }
 
@@ -228,7 +227,7 @@ div_ceil(Integral __n, Integral2 __d)
 
 ////////////////////////////////////////////////////////////////////////////
 
-inline uint64 longhash1(uint64 key_)
+inline size_t longhash1(uint64 key_)
 {
     key_ += ~(key_ << 32);
     key_ ^= (key_ >> 22);
@@ -238,13 +237,13 @@ inline uint64 longhash1(uint64 key_)
     key_ ^= (key_ >> 15);
     key_ += ~(key_ << 27);
     key_ ^= (key_ >> 31);
-    return key_;
+    return (size_t)key_;
 }
 
 ////////////////////////////////////////////////////////////////////////////
 
-template <class T>
-inline void swap_1D_arrays(T* a, T* b, unsigned_type size)
+template <class Type>
+inline void swap_1D_arrays(Type* a, Type* b, unsigned_type size)
 {
     for (unsigned_type i = 0; i < size; ++i)
         std::swap(a[i], b[i]);
diff --git a/include/stxxl/bits/compat/hash_map.h b/include/stxxl/bits/compat/hash_map.h
index d955b31..fd236fa 100644
--- a/include/stxxl/bits/compat/hash_map.h
+++ b/include/stxxl/bits/compat/hash_map.h
@@ -7,6 +7,7 @@
  *
  *  Copyright (C) 2008, 2010, 2011 Andreas Beckmann <beckmann at cs.uni-frankfurt.de>
  *  Copyright (C) 2009, 2010 Johannes Singler <singler at kit.edu>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
  *
  *  Distributed under the Boost Software License, Version 1.0.
  *  (See accompanying file LICENSE_1_0.txt or copy at
@@ -19,7 +20,7 @@
 #include <stxxl/bits/config.h>
 #include <stxxl/bits/namespace.h>
 
-#if defined(__GXX_EXPERIMENTAL_CXX0X__)
+#if __cplusplus >= 201103L
  #include <unordered_map>
 #elif STXXL_MSVC
  #include <hash_map>
@@ -32,31 +33,47 @@
 
 STXXL_BEGIN_NAMESPACE
 
-template <class _Tp>
+template <class KeyType>
 struct compat_hash {
-#if defined(__GXX_EXPERIMENTAL_CXX0X__)
-    typedef std::hash<_Tp> result;
+#if __cplusplus >= 201103L
+    typedef std::hash<KeyType> result;
 #elif STXXL_MSVC
-    typedef stdext::hash_compare<_Tp> result;
+    typedef stdext::hash_compare<KeyType> result;
 #elif defined(__GNUG__) && ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40200) && \
     (!defined(__ICC) || (__ICC > 1110))
-    typedef std::tr1::hash<_Tp> result;
+    typedef std::tr1::hash<KeyType> result;
 #else
-    typedef __gnu_cxx::hash<_Tp> result;
+    typedef __gnu_cxx::hash<KeyType> result;
 #endif
 };
 
-template <class _Key, class _Tp, class _Hash = typename compat_hash<_Key>::result>
+template <class KeyType, class MappedType,
+          class HashType = typename compat_hash<KeyType>::result>
 struct compat_hash_map {
-#if defined(__GXX_EXPERIMENTAL_CXX0X__)
-    typedef std::unordered_map<_Key, _Tp, _Hash> result;
+#if __cplusplus >= 201103L
+    typedef std::unordered_map<KeyType, MappedType, HashType> result;
 #elif STXXL_MSVC
-    typedef stdext::hash_map<_Key, _Tp, _Hash> result;
+    typedef stdext::hash_map<KeyType, MappedType, HashType> result;
 #elif defined(__GNUG__) && ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40200) && \
     (!defined(__ICC) || (__ICC > 1110))
-    typedef std::tr1::unordered_map<_Key, _Tp, _Hash> result;
+    typedef std::tr1::unordered_map<KeyType, MappedType, HashType> result;
 #else
-    typedef __gnu_cxx::hash_map<_Key, _Tp, _Hash> result;
+    typedef __gnu_cxx::hash_map<KeyType, MappedType, HashType> result;
+#endif
+};
+
+template <class KeyType, class MappedType,
+          class HashType = typename compat_hash<KeyType>::result>
+struct compat_hash_multimap {
+#if __cplusplus >= 201103L
+    typedef std::unordered_multimap<KeyType, MappedType, HashType> result;
+#elif STXXL_MSVC
+    typedef stdext::hash_multimap<KeyType, MappedType, HashType> result;
+#elif defined(__GNUG__) && ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40200) && \
+    (!defined(__ICC) || (__ICC > 1110))
+    typedef std::tr1::unordered_multimap<KeyType, MappedType, HashType> result;
+#else
+    typedef __gnu_cxx::hash_multimap<KeyType, MappedType, HashType> result;
 #endif
 };
 
diff --git a/include/stxxl/bits/compat/type_traits.h b/include/stxxl/bits/compat/type_traits.h
index 08a519e..5dfa8a7 100644
--- a/include/stxxl/bits/compat/type_traits.h
+++ b/include/stxxl/bits/compat/type_traits.h
@@ -16,7 +16,7 @@
 #include <stxxl/bits/config.h>
 #include <stxxl/bits/namespace.h>
 
-#if defined(__GXX_EXPERIMENTAL_CXX0X__)
+#if __cplusplus >= 201103L
 #include <type_traits>
 #elif defined(__GNUG__) && (__GNUC__ >= 4)
 #include <tr1/type_traits>
@@ -24,28 +24,27 @@
 #include <boost/type_traits/remove_const.hpp>
 #endif
 
-
 STXXL_BEGIN_NAMESPACE
 
 namespace compat {
 
-#if defined(__GXX_EXPERIMENTAL_CXX0X__)
+#if __cplusplus >= 201103L
 using std::remove_const;
 #elif defined(__GNUG__) && (__GNUC__ >= 4)
 using std::tr1::remove_const;
 #elif STXXL_BOOST_CONFIG
 using boost::remove_const;
 #else
-template <typename _Tp>
+template <typename Type>
 struct remove_const
 {
-    typedef _Tp type;
+    typedef Type type;
 };
 
-template <typename _Tp>
-struct remove_const<_Tp const>
+template <typename Type>
+struct remove_const<Type const>
 {
-    typedef _Tp type;
+    typedef Type type;
 };
 #endif
 
@@ -53,53 +52,52 @@ struct remove_const<_Tp const>
 // That is a small subset of what GCC 4.3 does:
 
 // Utility for finding the signed versions of unsigned integral types.
-template <typename _Tp>
-struct __make_signed
+template <typename Type>
+struct _make_signed
 {
-    typedef _Tp __type;
+    typedef Type type;
 };
 
 template <>
-struct __make_signed<char>
+struct _make_signed<char>
 {
-    typedef signed char __type;
+    typedef signed char type;
 };
 
 template <>
-struct __make_signed<unsigned char>
+struct _make_signed<unsigned char>
 {
-    typedef signed char __type;
+    typedef signed char type;
 };
 
 template <>
-struct __make_signed<unsigned short>
+struct _make_signed<unsigned short>
 {
-    typedef signed short __type;
+    typedef signed short type;
 };
 
 template <>
-struct __make_signed<unsigned int>
+struct _make_signed<unsigned int>
 {
-    typedef signed int __type;
+    typedef signed int type;
 };
 
 template <>
-struct __make_signed<unsigned long>
+struct _make_signed<unsigned long>
 {
-    typedef signed long __type;
+    typedef signed long type;
 };
 
 template <>
-struct __make_signed<unsigned long long>
+struct _make_signed<unsigned long long>
 {
-    typedef signed long long __type;
+    typedef signed long long type;
 };
 
-
-template <typename _Tp>
+template <typename Type>
 struct make_signed
 {
-    typedef typename __make_signed<_Tp>::__type type;
+    typedef typename _make_signed<Type>::type type;
 };
 #endif
 
diff --git a/include/stxxl/bits/compat/unique_ptr.h b/include/stxxl/bits/compat/unique_ptr.h
index 7554a0a..9df12ff 100644
--- a/include/stxxl/bits/compat/unique_ptr.h
+++ b/include/stxxl/bits/compat/unique_ptr.h
@@ -15,20 +15,18 @@
 #ifndef STXXL_COMPAT_UNIQUE_PTR_HEADER
 #define STXXL_COMPAT_UNIQUE_PTR_HEADER
 
-
 #include <memory>
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-template <class _Tp>
+template <class Type>
 struct compat_unique_ptr {
-#if defined(__GXX_EXPERIMENTAL_CXX0X__) && ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40400)
-    typedef std::unique_ptr<_Tp> result;
+#if __cplusplus >= 201103L && ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40400)
+    typedef std::unique_ptr<Type> result;
 #else
     // auto_ptr is inherently broken and is deprecated by unique_ptr in c++0x
-    typedef std::auto_ptr<_Tp> result;
+    typedef std::auto_ptr<Type> result;
 #endif
 };
 
@@ -38,17 +36,17 @@ STXXL_END_NAMESPACE
 
 namespace workaround_gcc_3_4 {
 
-// std::swap in gcc 3.4 is broken, __tmp is declared const there
-template <typename _Tp>
+// std::swap in gcc 3.4 is broken, tmp is declared const there
+template <typename Type>
 inline void
-swap(_Tp& __a, _Tp& __b)
+swap(Type& a, Type& b)
 {
     // concept requirements
-    __glibcxx_function_requires(_SGIAssignableConcept<_Tp>)
+    __glibcxx_function_requires(_SGIAssignableConcept<Type>)
 
-    _Tp __tmp = __a;
-    __a = __b;
-    __b = __tmp;
+    Type tmp = a;
+    a = b;
+    b = tmp;
 }
 
 } // namespace workaround_gcc_3_4
@@ -56,8 +54,8 @@ swap(_Tp& __a, _Tp& __b)
 namespace std {
 
 // overload broken std::swap<auto_ptr> to call a working swap()
-template <typename _Tp>
-inline void swap(std::auto_ptr<_Tp>& a, std::auto_ptr<_Tp>& b)
+template <typename Type>
+inline void swap(std::auto_ptr<Type>& a, std::auto_ptr<Type>& b)
 {
     workaround_gcc_3_4::swap(a, b);
 }
diff --git a/include/stxxl/bits/config.h.in b/include/stxxl/bits/config.h.in
index 72de57c..db3df3f 100644
--- a/include/stxxl/bits/config.h.in
+++ b/include/stxxl/bits/config.h.in
@@ -38,6 +38,11 @@
 // used in: io/mmap_file.h/cpp
 // effect:  enables/disables memory mapped file implementation
 
+#cmakedefine STXXL_HAVE_LINUXAIO_FILE ${STXXL_HAVE_LINUXAIO_FILE}
+// default: 0/1 (platform dependent)
+// used in: io/linuxaio_file.h/cpp
+// effect:  enables/disables Linux AIO file implementation
+
 #cmakedefine STXXL_POSIX_THREADS ${STXXL_POSIX_THREADS}
 // default: off
 // cmake:   detection of pthreads by cmake
diff --git a/include/stxxl/bits/containers/btree/btree.h b/include/stxxl/bits/containers/btree/btree.h
index ed58f33..fb14497 100644
--- a/include/stxxl/bits/containers/btree/btree.h
+++ b/include/stxxl/bits/containers/btree/btree.h
@@ -23,7 +23,6 @@
 #include <stxxl/bits/containers/btree/node.h>
 #include <stxxl/vector>
 
-
 STXXL_BEGIN_NAMESPACE
 
 namespace btree {
@@ -42,7 +41,8 @@ public:
     typedef DataType data_type;
     typedef CompareType key_compare;
 
-    typedef btree<KeyType, DataType, CompareType, RawNodeSize, RawLeafSize, PDAllocStrategy> SelfType;
+    typedef btree<KeyType, DataType, CompareType,
+                  RawNodeSize, RawLeafSize, PDAllocStrategy> self_type;
 
     typedef PDAllocStrategy alloc_strategy_type;
 
@@ -54,27 +54,26 @@ public:
     typedef value_type* pointer;
     typedef value_type const* const_pointer;
 
-
     // leaf type declarations
-    typedef normal_leaf<key_type, data_type, key_compare, RawLeafSize, SelfType> leaf_type;
-    friend class normal_leaf<key_type, data_type, key_compare, RawLeafSize, SelfType>;
+    typedef normal_leaf<key_type, data_type, key_compare, RawLeafSize, self_type> leaf_type;
+    friend class normal_leaf<key_type, data_type, key_compare, RawLeafSize, self_type>;
     typedef typename leaf_type::block_type leaf_block_type;
     typedef typename leaf_type::bid_type leaf_bid_type;
-    typedef node_cache<leaf_type, SelfType> leaf_cache_type;
-    friend class node_cache<leaf_type, SelfType>;
+    typedef node_cache<leaf_type, self_type> leaf_cache_type;
+    friend class node_cache<leaf_type, self_type>;
     // iterator types
-    typedef btree_iterator<SelfType> iterator;
-    typedef btree_const_iterator<SelfType> const_iterator;
-    friend class btree_iterator_base<SelfType>;
+    typedef btree_iterator<self_type> iterator;
+    typedef btree_const_iterator<self_type> const_iterator;
+    friend class btree_iterator_base<self_type>;
     // iterator map type
-    typedef iterator_map<SelfType> iterator_map_type;
+    typedef iterator_map<self_type> iterator_map_type;
     // node type declarations
-    typedef normal_node<key_type, key_compare, RawNodeSize, SelfType> node_type;
+    typedef normal_node<key_type, key_compare, RawNodeSize, self_type> node_type;
     typedef typename node_type::block_type node_block_type;
-    friend class normal_node<key_type, key_compare, RawNodeSize, SelfType>;
+    friend class normal_node<key_type, key_compare, RawNodeSize, self_type>;
     typedef typename node_type::bid_type node_bid_type;
-    typedef node_cache<node_type, SelfType> node_cache_type;
-    friend class node_cache<node_type, SelfType>;
+    typedef node_cache<node_type, self_type> node_cache_type;
+    friend class node_cache<node_type, self_type>;
 
     typedef typename leaf_type::value_compare value_compare;
 
@@ -86,47 +85,46 @@ public:
     };
 
 private:
-    key_compare key_compare_;
-    mutable node_cache_type node_cache_;
-    mutable leaf_cache_type leaf_cache_;
-    iterator_map_type iterator_map_;
-    size_type size_;
-    unsigned int height_;
-    bool prefetching_enabled_;
-    block_manager* bm_;
-    alloc_strategy_type alloc_strategy_;
+    key_compare m_key_compare;
+    mutable node_cache_type m_node_cache;
+    mutable leaf_cache_type m_leaf_cache;
+    iterator_map_type m_iterator_map;
+    size_type m_size;
+    unsigned int m_height;
+    bool m_prefetching_enabled;
+    block_manager* m_bm;
+    alloc_strategy_type m_alloc_strategy;
 
     typedef std::map<key_type, node_bid_type, key_compare> root_node_type;
     typedef typename root_node_type::iterator root_node_iterator_type;
     typedef typename root_node_type::const_iterator root_node_const_iterator_type;
     typedef std::pair<key_type, node_bid_type> root_node_pair_type;
 
-
-    root_node_type root_node_;
-    iterator end_iterator;
-
+    root_node_type m_root_node;
+    iterator m_end_iterator;
 
     void insert_into_root(const std::pair<key_type, node_bid_type>& splitter)
     {
         std::pair<root_node_iterator_type, bool> result =
-            root_node_.insert(splitter);
+            m_root_node.insert(splitter);
         STXXL_ASSERT(result.second == true);
-        if (root_node_.size() > max_node_size)          // root overflow
+
+        if (m_root_node.size() > max_node_size) // root overflow
         {
             STXXL_VERBOSE1("btree::insert_into_root, overflow happened, splitting");
 
-            node_bid_type LeftBid;
-            node_type* LeftNode = node_cache_.get_new_node(LeftBid);
-            assert(LeftNode);
-            node_bid_type RightBid;
-            node_type* RightNode = node_cache_.get_new_node(RightBid);
-            assert(RightNode);
+            node_bid_type left_bid;
+            node_type* left_node = m_node_cache.get_new_node(left_bid);
+            assert(left_node);
+            node_bid_type right_bid;
+            node_type* right_node = m_node_cache.get_new_node(right_bid);
+            assert(right_node);
 
-            const unsigned_type old_size = root_node_.size();
-            const unsigned_type half = root_node_.size() / 2;
+            const unsigned_type old_size = m_root_node.size();
+            const unsigned_type half = m_root_node.size() / 2;
             unsigned_type i = 0;
-            root_node_iterator_type it = root_node_.begin();
-            typename node_block_type::iterator block_it = LeftNode->block().begin();
+            root_node_iterator_type it = m_root_node.begin();
+            typename node_block_type::iterator block_it = left_node->block().begin();
             while (i < half)                    // copy smaller part
             {
                 *block_it = *it;
@@ -134,10 +132,10 @@ private:
                 ++block_it;
                 ++it;
             }
-            LeftNode->block().info.cur_size = (unsigned int)half;
-            key_type LeftKey = (LeftNode->block()[half - 1]).first;
+            left_node->block().info.cur_size = (unsigned int)half;
+            key_type left_key = (left_node->block()[half - 1]).first;
 
-            block_it = RightNode->block().begin();
+            block_it = right_node->block().begin();
             while (i < old_size)                // copy larger part
             {
                 *block_it = *it;
@@ -145,261 +143,302 @@ private:
                 ++block_it;
                 ++it;
             }
-            unsigned_type right_size = RightNode->block().info.cur_size = (unsigned int)(old_size - half);
-            key_type RightKey = (RightNode->block()[right_size - 1]).first;
+            unsigned_type right_size = right_node->block().info.cur_size = (unsigned int)(old_size - half);
+            key_type right_key = (right_node->block()[right_size - 1]).first;
 
-            assert(old_size == RightNode->size() + LeftNode->size());
+            assert(old_size == right_node->size() + left_node->size());
 
             // create new root node
-            root_node_.clear();
-            root_node_.insert(root_node_pair_type(LeftKey, LeftBid));
-            root_node_.insert(root_node_pair_type(RightKey, RightBid));
-
+            m_root_node.clear();
+            m_root_node.insert(root_node_pair_type(left_key, left_bid));
+            m_root_node.insert(root_node_pair_type(right_key, right_bid));
 
-            ++height_;
-            STXXL_VERBOSE1("btree Increasing height to " << height_);
-            if (node_cache_.size() < (height_ - 1))
+            ++m_height;
+            STXXL_VERBOSE1("btree Increasing height to " << m_height);
+            if (m_node_cache.size() < (m_height - 1))
             {
-                STXXL_THROW2(std::runtime_error, "btree::bulk_construction", "The height of the tree (" << height_ << ") has exceeded the required capacity (" << (node_cache_.size() + 1) << ") of the node cache. Increase the node cache size.");
+                STXXL_THROW2(std::runtime_error, "btree::bulk_construction",
+                             "The height of the tree (" << m_height << ") has exceeded the required capacity (" << (m_node_cache.size() + 1) << ") of the node cache. Increase the node cache size.");
             }
         }
     }
 
     template <class CacheType>
-    void fuse_or_balance(root_node_iterator_type UIt, CacheType& cache_)
+    void fuse_or_balance(root_node_iterator_type uit, CacheType& cache)
     {
         typedef typename CacheType::node_type local_node_type;
         typedef typename local_node_type::bid_type local_bid_type;
 
-        root_node_iterator_type leftIt, rightIt;
-        if (UIt->first == key_compare::max_value())             // UIt is the last entry in the root
+        root_node_iterator_type left_it, right_it;
+        if (uit->first == key_compare::max_value())
         {
-            assert(UIt != root_node_.begin());
-            rightIt = UIt;
-            leftIt = --UIt;
+            // uit is the last entry in the root
+            assert(uit != m_root_node.begin());
+            right_it = uit;
+            left_it = --uit;
         }
         else
         {
-            leftIt = UIt;
-            rightIt = ++UIt;
-            assert(rightIt != root_node_.end());
+            left_it = uit;
+            right_it = ++uit;
+            assert(right_it != m_root_node.end());
         }
 
         // now fuse or balance nodes pointed by leftIt and rightIt
-        local_bid_type LeftBid = (local_bid_type)leftIt->second;
-        local_bid_type RightBid = (local_bid_type)rightIt->second;
-        local_node_type* LeftNode = cache_.get_node(LeftBid, true);
-        local_node_type* RightNode = cache_.get_node(RightBid, true);
+        local_bid_type left_bid = (local_bid_type)left_it->second;
+        local_bid_type right_bid = (local_bid_type)right_it->second;
+        local_node_type* left_node = cache.get_node(left_bid, true);
+        local_node_type* right_node = cache.get_node(right_bid, true);
 
-        const unsigned_type TotalSize = LeftNode->size() + RightNode->size();
-        if (TotalSize <= RightNode->max_nelements())
+        const unsigned_type total_size = left_node->size() + right_node->size();
+        if (total_size <= right_node->max_nelements())
         {
-            // fuse
-            RightNode->fuse(*LeftNode);                 // add the content of LeftNode to RightNode
+            // --- fuse ---
 
-            cache_.unfix_node(RightBid);
-            cache_.delete_node(LeftBid);                // 'delete_node' unfixes LeftBid also
+            // add the content of left_node to right_node
+            right_node->fuse(*left_node);
 
-            root_node_.erase(leftIt);                   // delete left BID from the root
+            cache.unfix_node(right_bid);
+            // 'delete_node' unfixes left_bid also
+            cache.delete_node(left_bid);
+
+            // delete left BID from the root
+            m_root_node.erase(left_it);
         }
         else
         {
-            // balance
+            // --- balance ---
+
+            key_type new_splitter = right_node->balance(*left_node);
 
-            key_type NewSplitter = RightNode->balance(*LeftNode);
+            // delete left BID from the root
+            m_root_node.erase(left_it);
 
-            root_node_.erase(leftIt);                   // delete left BID from the root
             // reinsert with the new key
-            root_node_.insert(root_node_pair_type(NewSplitter, (node_bid_type)LeftBid));
+            m_root_node.insert(root_node_pair_type(new_splitter, (node_bid_type)left_bid));
 
-            cache_.unfix_node(LeftBid);
-            cache_.unfix_node(RightBid);
+            cache.unfix_node(left_bid);
+            cache.unfix_node(right_bid);
         }
     }
 
     void create_empty_leaf()
     {
-        leaf_bid_type NewBid;
-        leaf_type* NewLeaf = leaf_cache_.get_new_node(NewBid);
-        assert(NewLeaf);
-        end_iterator = NewLeaf->end();                  // initialize end() iterator
-        root_node_.insert(root_node_pair_type(key_compare::max_value(), (node_bid_type)NewBid));
+        leaf_bid_type new_bid;
+        leaf_type* new_leaf = m_leaf_cache.get_new_node(new_bid);
+        assert(new_leaf);
+        m_end_iterator = new_leaf->end();           // initialize end() iterator
+        m_root_node.insert(
+            root_node_pair_type(key_compare::max_value(), (node_bid_type)new_bid)
+            );
     }
 
     void deallocate_children()
     {
-        if (height_ == 2)
+        if (m_height == 2)
         {
             // we have children leaves here
-            root_node_const_iterator_type it = root_node_.begin();
-            for ( ; it != root_node_.end(); ++it)
+            for (root_node_const_iterator_type it = m_root_node.begin();
+                 it != m_root_node.end(); ++it)
             {
                 // delete from leaf cache and deallocate bid
-                leaf_cache_.delete_node((leaf_bid_type)it->second);
+                m_leaf_cache.delete_node((leaf_bid_type)it->second);
             }
         }
         else
         {
-            root_node_const_iterator_type it = root_node_.begin();
-            for ( ; it != root_node_.end(); ++it)
+            for (root_node_const_iterator_type it = m_root_node.begin();
+                 it != m_root_node.end(); ++it)
             {
-                node_type* Node = node_cache_.get_node((node_bid_type)it->second);
-                assert(Node);
-                Node->deallocate_children(height_ - 1);
+                node_type* node = m_node_cache.get_node((node_bid_type)it->second);
+                assert(node);
+                node->deallocate_children(m_height - 1);
                 // delete from node cache and deallocate bid
-                node_cache_.delete_node((node_bid_type)it->second);
+                m_node_cache.delete_node((node_bid_type)it->second);
             }
         }
     }
 
     template <class InputIterator>
-    void bulk_construction(InputIterator b, InputIterator e, double node_fill_factor, double leaf_fill_factor)
+    void bulk_construction(InputIterator begin, InputIterator end,
+                           double node_fill_factor, double leaf_fill_factor)
     {
         assert(node_fill_factor >= 0.5);
         assert(leaf_fill_factor >= 0.5);
-        key_type lastKey = key_compare::max_value();
+        key_type last_key = key_compare::max_value();
 
         typedef std::pair<key_type, node_bid_type> key_bid_pair;
-        typedef typename stxxl::VECTOR_GENERATOR<key_bid_pair, 1, 1,
-                                                 node_block_type::raw_size>::result key_bid_vector_type;
+        typedef typename stxxl::VECTOR_GENERATOR<
+                key_bid_pair, 1, 1, node_block_type::raw_size
+                >::result key_bid_vector_type;
 
-        key_bid_vector_type Bids;
+        key_bid_vector_type bids;
 
-        leaf_bid_type NewBid;
-        leaf_type* Leaf = leaf_cache_.get_new_node(NewBid);
-        const unsigned_type max_leaf_elements = unsigned_type(double(Leaf->max_nelements()) * leaf_fill_factor);
+        leaf_bid_type new_bid;
+        leaf_type* leaf = m_leaf_cache.get_new_node(new_bid);
+        const unsigned_type max_leaf_elements = unsigned_type(
+            (double)leaf->max_nelements() * leaf_fill_factor
+            );
 
-        while (b != e)
+        while (begin != end)
         {
             // write data in leaves
 
             // if *b not equal to the last element
-            if (key_compare_(b->first, lastKey) || key_compare_(lastKey, b->first))
+            if (m_key_compare(begin->first, last_key) || m_key_compare(last_key, begin->first))
             {
-                ++size_;
-                if (Leaf->size() == max_leaf_elements)
+                ++m_size;
+                if (leaf->size() == max_leaf_elements)
                 {
                     // overflow, need a new block
-                    Bids.push_back(key_bid_pair(Leaf->back().first, (node_bid_type)NewBid));
+                    bids.push_back(key_bid_pair(leaf->back().first, (node_bid_type)new_bid));
 
-                    leaf_type* NewLeaf = leaf_cache_.get_new_node(NewBid);
-                    assert(NewLeaf);
+                    leaf_type* new_leaf = m_leaf_cache.get_new_node(new_bid);
+                    assert(new_leaf);
                     // Setting links
-                    Leaf->succ() = NewLeaf->my_bid();
-                    NewLeaf->pred() = Leaf->my_bid();
+                    leaf->succ() = new_leaf->my_bid();
+                    new_leaf->pred() = leaf->my_bid();
 
-                    Leaf = NewLeaf;
+                    leaf = new_leaf;
                 }
-                Leaf->push_back(*b);
-                lastKey = b->first;
+                leaf->push_back(*begin);
+                last_key = begin->first;
             }
-            ++b;
+            ++begin;
         }
 
         // rebalance the last leaf
-        if (Leaf->underflows() && !Bids.empty())
+        if (leaf->underflows() && !bids.empty())
         {
-            leaf_type* LeftLeaf = leaf_cache_.get_node((leaf_bid_type)(Bids.back().second));
-            assert(LeftLeaf);
-            if (LeftLeaf->size() + Leaf->size() <= Leaf->max_nelements())     // can fuse
+            leaf_type* left_leaf = m_leaf_cache.get_node((leaf_bid_type)(bids.back().second));
+            assert(left_leaf);
+            if (left_leaf->size() + leaf->size() <= leaf->max_nelements())
             {
-                Leaf->fuse(*LeftLeaf);
-                leaf_cache_.delete_node((leaf_bid_type)(Bids.back().second));
-                Bids.pop_back();
-                assert(!Leaf->overflows() && !Leaf->underflows());
+                // can fuse
+                leaf->fuse(*left_leaf);
+                m_leaf_cache.delete_node((leaf_bid_type)(bids.back().second));
+                bids.pop_back();
+                assert(!leaf->overflows() && !leaf->underflows());
             }
             else
             {
                 // need to rebalance
-                const key_type NewSplitter = Leaf->balance(*LeftLeaf);
-                Bids.back().first = NewSplitter;
-                assert(!LeftLeaf->overflows() && !LeftLeaf->underflows());
+                const key_type new_splitter = leaf->balance(*left_leaf);
+                bids.back().first = new_splitter;
+                assert(!left_leaf->overflows() && !left_leaf->underflows());
             }
         }
 
-        assert(!Leaf->overflows() && (!Leaf->underflows() || size_ <= max_leaf_size));
+        assert(!leaf->overflows() && (!leaf->underflows() || m_size <= max_leaf_size));
 
-        end_iterator = Leaf->end();                 // initialize end() iterator
+        m_end_iterator = leaf->end();                 // initialize end() iterator
 
-        Bids.push_back(key_bid_pair(key_compare::max_value(), (node_bid_type)NewBid));
+        bids.push_back(key_bid_pair(key_compare::max_value(), (node_bid_type)new_bid));
 
-        const unsigned_type max_node_elements = unsigned_type(double(max_node_size) * node_fill_factor);
+        const unsigned_type max_node_elements = unsigned_type(
+            double(max_node_size) * node_fill_factor
+            );
 
-        while (Bids.size() > max_node_elements)
+        //-tb fixes bug with only one child remaining in m_root_node
+        while (bids.size() > node_type::max_nelements())
         {
-            key_bid_vector_type ParentBids;
+            key_bid_vector_type parent_bids;
 
-            stxxl::uint64 nparents = div_ceil(Bids.size(), max_node_elements);
+            stxxl::uint64 nparents = div_ceil(bids.size(), max_node_elements);
             assert(nparents >= 2);
-            STXXL_VERBOSE1("btree bulk constructBids.size() " << Bids.size() << " nparents: " << nparents << " max_ns: "
-                                                              << max_node_elements);
-            STXXL_UNUSED(nparents);
-            typename key_bid_vector_type::const_iterator it = Bids.begin();
-
-            do
+            STXXL_VERBOSE1("btree bulk construct"
+                           << " bids.size=" << bids.size()
+                           << " nparents=" << nparents
+                           << " max_node_elements=" << max_node_elements
+                           << " node_type::max_nelements=" << node_type::max_nelements());
+
+            for (typename key_bid_vector_type::const_iterator it = bids.begin();
+                 it != bids.end(); )
             {
-                node_bid_type NewBid;
-                node_type* Node = node_cache_.get_new_node(NewBid);
-                assert(Node);
-                unsigned_type cnt = 0;
-                for ( ; cnt < max_node_elements && it != Bids.end(); ++cnt, ++it)
+                node_bid_type new_bid;
+                node_type* node = m_node_cache.get_new_node(new_bid);
+                assert(node);
+
+                for (unsigned_type cnt = 0;
+                     cnt < max_node_elements && it != bids.end(); ++cnt, ++it)
                 {
-                    Node->push_back(*it);
+                    node->push_back(*it);
                 }
-                STXXL_VERBOSE1("btree bulk construct Node size : " << Node->size() << " limits: " <<
-                               Node->min_nelements() << " " << Node->max_nelements() << " max_node_elements: " << max_node_elements);
 
-                if (Node->underflows())
+                STXXL_VERBOSE1("btree bulk construct node size : " << node->size() << " limits: " << node->min_nelements() << " " << node->max_nelements() << " max_node_elements: " << max_node_elements);
+
+                if (node->underflows())
                 {
-                    assert(it == Bids.end());                           // this can happen only at the end
-                    assert(!ParentBids.empty());
+                    // this can happen only at the end
+                    assert(it == bids.end());
+                    assert(!parent_bids.empty());
 
-                    node_type* LeftNode = node_cache_.get_node(ParentBids.back().second);
-                    assert(LeftNode);
-                    if (LeftNode->size() + Node->size() <= Node->max_nelements())     // can fuse
+                    node_type* left_node = m_node_cache.get_node(parent_bids.back().second);
+                    assert(left_node);
+                    if (left_node->size() + node->size() <= node->max_nelements())
                     {
-                        Node->fuse(*LeftNode);
-                        node_cache_.delete_node(ParentBids.back().second);
-                        ParentBids.pop_back();
+                        // can fuse
+                        STXXL_VERBOSE1("btree bulk construct fuse last nodes:"
+                                       << " left_node.size=" << left_node->size()
+                                       << " node.size=" << node->size());
+
+                        node->fuse(*left_node);
+                        m_node_cache.delete_node(parent_bids.back().second);
+                        parent_bids.pop_back();
                     }
                     else
-                    {       // need to rebalance
-                        const key_type NewSplitter = Node->balance(*LeftNode);
-                        ParentBids.back().first = NewSplitter;
-                        assert(!LeftNode->overflows() && !LeftNode->underflows());
+                    {
+                        // need to rebalance
+                        STXXL_VERBOSE1("btree bulk construct rebalance last nodes:"
+                                       << " left_node.size=" << left_node->size()
+                                       << " node.size=" << node->size());
+
+                        const key_type new_splitter = node->balance(*left_node, false);
+                        parent_bids.back().first = new_splitter;
+
+                        STXXL_VERBOSE1("btree bulk construct after rebalance:"
+                                       << " left_node.size=" << left_node->size()
+                                       << " node.size=" << node->size());
+
+                        assert(!left_node->overflows() && !left_node->underflows());
                     }
                 }
-                assert(!Node->overflows() && !Node->underflows());
+                assert(!node->overflows() && !node->underflows());
+
+                parent_bids.push_back(key_bid_pair(node->back().first, new_bid));
+            }
 
-                ParentBids.push_back(key_bid_pair(Node->back().first, NewBid));
-            } while (it != Bids.end());
+            STXXL_VERBOSE1("btree parent_bids.size()=" << parent_bids.size()
+                                                       << " bids.size()=" << bids.size());
 
-            std::swap(ParentBids, Bids);
+            std::swap(parent_bids, bids);
 
-            assert(nparents == Bids.size() || (nparents - 1) == Bids.size());
+            assert(nparents == bids.size() || (nparents - 1) == bids.size());
 
-            ++height_;
-            STXXL_VERBOSE1("Increasing height to " << height_);
-            if (node_cache_.size() < (height_ - 1))
+            ++m_height;
+            STXXL_VERBOSE1("Increasing height to " << m_height);
+            if (m_node_cache.size() < (m_height - 1))
             {
-                STXXL_THROW2(std::runtime_error, "btree::bulk_construction", "The height of the tree (" << height_ << ") has exceeded the required capacity (" << (node_cache_.size() + 1) << ") of the node cache. Increase the node cache size.");
+                STXXL_THROW2(std::runtime_error, "btree::bulk_construction",
+                             "The height of the tree (" << m_height << ") has exceeded the required capacity (" << (m_node_cache.size() + 1) << ") of the node cache. Increase the node cache size.");
             }
         }
 
-        root_node_.insert(Bids.begin(), Bids.end());
+        m_root_node.insert(bids.begin(), bids.end());
+
+        STXXL_VERBOSE1("btree bulk root_node_.size()=" << m_root_node.size());
     }
 
 public:
     btree(unsigned_type node_cache_size_in_bytes,
-          unsigned_type leaf_cache_size_in_bytes
-          ) :
-        node_cache_(node_cache_size_in_bytes, this, key_compare_),
-        leaf_cache_(leaf_cache_size_in_bytes, this, key_compare_),
-        iterator_map_(this),
-        size_(0),
-        height_(2),
-        prefetching_enabled_(true),
-        bm_(block_manager::get_instance())
+          unsigned_type leaf_cache_size_in_bytes)
+        : m_node_cache(node_cache_size_in_bytes, this, m_key_compare),
+          m_leaf_cache(leaf_cache_size_in_bytes, this, m_key_compare),
+          m_iterator_map(this),
+          m_size(0),
+          m_height(2),
+          m_prefetching_enabled(true),
+          m_bm(block_manager::get_instance())
     {
         STXXL_VERBOSE1("Creating a btree, addr=" << this);
         STXXL_VERBOSE1(" bytes in a node: " << node_bid_type::size);
@@ -409,22 +448,20 @@ public:
         STXXL_VERBOSE1(" size of a node element: " << sizeof(typename node_block_type::value_type));
         STXXL_VERBOSE1(" size of a leaf element: " << sizeof(typename leaf_block_type::value_type));
 
-
         create_empty_leaf();
     }
 
-    btree(const key_compare& c_,
+    btree(const key_compare& key_compare,
           unsigned_type node_cache_size_in_bytes,
-          unsigned_type leaf_cache_size_in_bytes
-          ) :
-        key_compare_(c_),
-        node_cache_(node_cache_size_in_bytes, this, key_compare_),
-        leaf_cache_(leaf_cache_size_in_bytes, this, key_compare_),
-        iterator_map_(this),
-        size_(0),
-        height_(2),
-        prefetching_enabled_(true),
-        bm_(block_manager::get_instance())
+          unsigned_type leaf_cache_size_in_bytes)
+        : m_key_compare(key_compare),
+          m_node_cache(node_cache_size_in_bytes, this, m_key_compare),
+          m_leaf_cache(leaf_cache_size_in_bytes, this, m_key_compare),
+          m_iterator_map(this),
+          m_size(0),
+          m_height(2),
+          m_prefetching_enabled(true),
+          m_bm(block_manager::get_instance())
     {
         STXXL_VERBOSE1("Creating a btree, addr=" << this);
         STXXL_VERBOSE1(" bytes in a node: " << node_bid_type::size);
@@ -438,7 +475,8 @@ public:
         try
         {
             deallocate_children();
-        } catch (...)
+        }
+        catch (...)
         {
             // no exceptions in destructor
         }
@@ -446,7 +484,7 @@ public:
 
     size_type size() const
     {
-        return size_;
+        return m_size;
     }
 
     size_type max_size() const
@@ -456,129 +494,130 @@ public:
 
     bool empty() const
     {
-        return !size_;
+        return !m_size;
     }
 
     std::pair<iterator, bool> insert(const value_type& x)
     {
-        root_node_iterator_type it = root_node_.lower_bound(x.first);
-        assert(!root_node_.empty());
-        assert(it != root_node_.end());
-        if (height_ == 2)                // 'it' points to a leaf
+        root_node_iterator_type it = m_root_node.lower_bound(x.first);
+        assert(!m_root_node.empty());
+        assert(it != m_root_node.end());
+
+        if (m_height == 2)                // 'it' points to a leaf
         {
             STXXL_VERBOSE1("Inserting new value into a leaf");
-            leaf_type* Leaf = leaf_cache_.get_node((leaf_bid_type)it->second, true);
-            assert(Leaf);
-            std::pair<key_type, leaf_bid_type> Splitter;
-            std::pair<iterator, bool> result = Leaf->insert(x, Splitter);
+            leaf_type* leaf = m_leaf_cache.get_node((leaf_bid_type)it->second, true);
+            assert(leaf);
+            std::pair<key_type, leaf_bid_type> splitter;
+            std::pair<iterator, bool> result = leaf->insert(x, splitter);
             if (result.second)
-                ++size_;
+                ++m_size;
 
-            leaf_cache_.unfix_node((leaf_bid_type)it->second);
+            m_leaf_cache.unfix_node((leaf_bid_type)it->second);
             //if(key_compare::max_value() == Splitter.first)
-            if (!(key_compare_(key_compare::max_value(), Splitter.first) ||
-                  key_compare_(Splitter.first, key_compare::max_value())))
+            if (!(m_key_compare(key_compare::max_value(), splitter.first) ||
+                  m_key_compare(splitter.first, key_compare::max_value())))
                 return result;
             // no overflow/splitting happened
 
             STXXL_VERBOSE1("Inserting new value into root node");
 
-            insert_into_root(std::make_pair(Splitter.first, node_bid_type(Splitter.second)));
+            insert_into_root(std::make_pair(splitter.first, node_bid_type(splitter.second)));
 
-            assert(leaf_cache_.nfixed() == 0);
-            assert(node_cache_.nfixed() == 0);
+            assert(m_leaf_cache.nfixed() == 0);
+            assert(m_node_cache.nfixed() == 0);
             return result;
         }
 
         // 'it' points to a node
         STXXL_VERBOSE1("Inserting new value into a node");
-        node_type* Node = node_cache_.get_node((node_bid_type)it->second, true);
-        assert(Node);
-        std::pair<key_type, node_bid_type> Splitter;
-        std::pair<iterator, bool> result = Node->insert(x, height_ - 1, Splitter);
+        node_type* node = m_node_cache.get_node((node_bid_type)it->second, true);
+        assert(node);
+        std::pair<key_type, node_bid_type> splitter;
+        std::pair<iterator, bool> result = node->insert(x, m_height - 1, splitter);
         if (result.second)
-            ++size_;
+            ++m_size;
 
-        node_cache_.unfix_node((node_bid_type)it->second);
+        m_node_cache.unfix_node((node_bid_type)it->second);
         //if(key_compare::max_value() == Splitter.first)
-        if (!(key_compare_(key_compare::max_value(), Splitter.first) ||
-              key_compare_(Splitter.first, key_compare::max_value())))
+        if (!(m_key_compare(key_compare::max_value(), splitter.first) ||
+              m_key_compare(splitter.first, key_compare::max_value())))
             return result;
         // no overflow/splitting happened
 
         STXXL_VERBOSE1("Inserting new value into root node");
 
-        insert_into_root(Splitter);
+        insert_into_root(splitter);
 
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
 
         return result;
     }
 
     iterator begin()
     {
-        root_node_iterator_type it = root_node_.begin();
-        assert(it != root_node_.end());
+        root_node_iterator_type it = m_root_node.begin();
+        assert(it != m_root_node.end());
 
-        if (height_ == 2)                // 'it' points to a leaf
+        if (m_height == 2)                // 'it' points to a leaf
         {
             STXXL_VERBOSE1("btree: retrieving begin() from the first leaf");
-            leaf_type* Leaf = leaf_cache_.get_node((leaf_bid_type)it->second);
-            assert(Leaf);
+            leaf_type* leaf = m_leaf_cache.get_node((leaf_bid_type)it->second);
+            assert(leaf);
 
-            assert(leaf_cache_.nfixed() == 0);
-            assert(node_cache_.nfixed() == 0);
-            return Leaf->begin();
+            assert(m_leaf_cache.nfixed() == 0);
+            assert(m_node_cache.nfixed() == 0);
+            return leaf->begin();
         }
 
         // 'it' points to a node
         STXXL_VERBOSE1("btree: retrieving begin() from the first node");
-        node_type* Node = node_cache_.get_node((node_bid_type)it->second, true);
-        assert(Node);
-        iterator result = Node->begin(height_ - 1);
-        node_cache_.unfix_node((node_bid_type)it->second);
+        node_type* node = m_node_cache.get_node((node_bid_type)it->second, true);
+        assert(node);
+        iterator result = node->begin(m_height - 1);
+        m_node_cache.unfix_node((node_bid_type)it->second);
 
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
 
         return result;
     }
 
     const_iterator begin() const
     {
-        root_node_const_iterator_type it = root_node_.begin();
-        assert(it != root_node_.end());
+        root_node_const_iterator_type it = m_root_node.begin();
+        assert(it != m_root_node.end());
 
-        if (height_ == 2)                // 'it' points to a leaf
+        if (m_height == 2)                // 'it' points to a leaf
         {
             STXXL_VERBOSE1("btree: retrieving begin() from the first leaf");
-            leaf_type const* Leaf = leaf_cache_.get_const_node((leaf_bid_type)it->second);
-            assert(Leaf);
-            assert(leaf_cache_.nfixed() == 0);
-            assert(node_cache_.nfixed() == 0);
-            return Leaf->begin();
+            const leaf_type* leaf = m_leaf_cache.get_const_node((leaf_bid_type)it->second);
+            assert(leaf);
+            assert(m_leaf_cache.nfixed() == 0);
+            assert(m_node_cache.nfixed() == 0);
+            return leaf->begin();
         }
 
         // 'it' points to a node
         STXXL_VERBOSE1("btree: retrieving begin() from the first node");
-        node_type const* Node = node_cache_.get_const_node((node_bid_type)it->second, true);
-        assert(Node);
-        const_iterator result = Node->begin(height_ - 1);
-        node_cache_.unfix_node((node_bid_type)it->second);
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
+        const node_type* node = m_node_cache.get_const_node((node_bid_type)it->second, true);
+        assert(node);
+        const_iterator result = node->begin(m_height - 1);
+        m_node_cache.unfix_node((node_bid_type)it->second);
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
         return result;
     }
 
     iterator end()
     {
-        return end_iterator;
+        return m_end_iterator;
     }
 
     const_iterator end() const
     {
-        return end_iterator;
+        return m_end_iterator;
     }
 
     data_type& operator [] (const key_type& k)
@@ -588,284 +627,293 @@ public:
 
     iterator find(const key_type& k)
     {
-        root_node_iterator_type it = root_node_.lower_bound(k);
-        assert(it != root_node_.end());
+        root_node_iterator_type it = m_root_node.lower_bound(k);
+        assert(it != m_root_node.end());
 
-        if (height_ == 2)                // 'it' points to a leaf
+        if (m_height == 2)                // 'it' points to a leaf
         {
             STXXL_VERBOSE1("Searching in a leaf");
-            leaf_type* Leaf = leaf_cache_.get_node((leaf_bid_type)it->second, true);
-            assert(Leaf);
-            iterator result = Leaf->find(k);
-            leaf_cache_.unfix_node((leaf_bid_type)it->second);
+            leaf_type* leaf = m_leaf_cache.get_node((leaf_bid_type)it->second, true);
+            assert(leaf);
+            iterator result = leaf->find(k);
+            m_leaf_cache.unfix_node((leaf_bid_type)it->second);
             assert(result == end() || result->first == k);
-            assert(leaf_cache_.nfixed() == 0);
-            assert(node_cache_.nfixed() == 0);
+            assert(m_leaf_cache.nfixed() == 0);
+            assert(m_node_cache.nfixed() == 0);
             return result;
         }
 
         // 'it' points to a node
         STXXL_VERBOSE1("Searching in a node");
-        node_type* Node = node_cache_.get_node((node_bid_type)it->second, true);
-        assert(Node);
-        iterator result = Node->find(k, height_ - 1);
-        node_cache_.unfix_node((node_bid_type)it->second);
+        node_type* node = m_node_cache.get_node((node_bid_type)it->second, true);
+        assert(node);
+        iterator result = node->find(k, m_height - 1);
+        m_node_cache.unfix_node((node_bid_type)it->second);
 
         assert(result == end() || result->first == k);
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
         return result;
     }
 
     const_iterator find(const key_type& k) const
     {
-        root_node_const_iterator_type it = root_node_.lower_bound(k);
-        assert(it != root_node_.end());
+        root_node_const_iterator_type it = m_root_node.lower_bound(k);
+        assert(it != m_root_node.end());
 
-        if (height_ == 2)                // 'it' points to a leaf
+        if (m_height == 2)                // 'it' points to a leaf
         {
             STXXL_VERBOSE1("Searching in a leaf");
-            leaf_type const* Leaf = leaf_cache_.get_const_node((leaf_bid_type)it->second, true);
-            assert(Leaf);
-            const_iterator result = Leaf->find(k);
-            leaf_cache_.unfix_node((leaf_bid_type)it->second);
+            const leaf_type* leaf = m_leaf_cache.get_const_node((leaf_bid_type)it->second, true);
+            assert(leaf);
+            const_iterator result = leaf->find(k);
+            m_leaf_cache.unfix_node((leaf_bid_type)it->second);
             assert(result == end() || result->first == k);
-            assert(leaf_cache_.nfixed() == 0);
-            assert(node_cache_.nfixed() == 0);
+            assert(m_leaf_cache.nfixed() == 0);
+            assert(m_node_cache.nfixed() == 0);
             return result;
         }
 
         // 'it' points to a node
         STXXL_VERBOSE1("Searching in a node");
-        node_type const* Node = node_cache_.get_const_node((node_bid_type)it->second, true);
-        assert(Node);
-        const_iterator result = Node->find(k, height_ - 1);
-        node_cache_.unfix_node((node_bid_type)it->second);
+        const node_type* node = m_node_cache.get_const_node((node_bid_type)it->second, true);
+        assert(node);
+        const_iterator result = node->find(k, m_height - 1);
+        m_node_cache.unfix_node((node_bid_type)it->second);
 
         assert(result == end() || result->first == k);
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
         return result;
     }
 
     iterator lower_bound(const key_type& k)
     {
-        root_node_iterator_type it = root_node_.lower_bound(k);
-        assert(it != root_node_.end());
+        root_node_iterator_type it = m_root_node.lower_bound(k);
+        assert(it != m_root_node.end());
 
-        if (height_ == 2)                // 'it' points to a leaf
+        if (m_height == 2)                // 'it' points to a leaf
         {
             STXXL_VERBOSE1("Searching lower bound in a leaf");
-            leaf_type* Leaf = leaf_cache_.get_node((leaf_bid_type)it->second, true);
-            assert(Leaf);
-            iterator result = Leaf->lower_bound(k);
-            leaf_cache_.unfix_node((leaf_bid_type)it->second);
-            assert(leaf_cache_.nfixed() == 0);
-            assert(node_cache_.nfixed() == 0);
+            leaf_type* leaf = m_leaf_cache.get_node((leaf_bid_type)it->second, true);
+            assert(leaf);
+            iterator result = leaf->lower_bound(k);
+            m_leaf_cache.unfix_node((leaf_bid_type)it->second);
+            assert(m_leaf_cache.nfixed() == 0);
+            assert(m_node_cache.nfixed() == 0);
             return result;
         }
 
         // 'it' points to a node
         STXXL_VERBOSE1("Searching lower bound in a node");
-        node_type* Node = node_cache_.get_node((node_bid_type)it->second, true);
-        assert(Node);
-        iterator result = Node->lower_bound(k, height_ - 1);
-        node_cache_.unfix_node((node_bid_type)it->second);
+        node_type* node = m_node_cache.get_node((node_bid_type)it->second, true);
+        assert(node);
+        iterator result = node->lower_bound(k, m_height - 1);
+        m_node_cache.unfix_node((node_bid_type)it->second);
 
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
         return result;
     }
 
     const_iterator lower_bound(const key_type& k) const
     {
-        root_node_const_iterator_type it = root_node_.lower_bound(k);
-        assert(it != root_node_.end());
+        root_node_const_iterator_type it = m_root_node.lower_bound(k);
+        assert(it != m_root_node.end());
 
-        if (height_ == 2)                // 'it' points to a leaf
+        if (m_height == 2)                // 'it' points to a leaf
         {
             STXXL_VERBOSE1("Searching lower bound in a leaf");
-            leaf_type const* Leaf = leaf_cache_.get_const_node((leaf_bid_type)it->second, true);
-            assert(Leaf);
-            const_iterator result = Leaf->lower_bound(k);
-            leaf_cache_.unfix_node((leaf_bid_type)it->second);
+            const leaf_type* leaf = m_leaf_cache.get_const_node((leaf_bid_type)it->second, true);
+            assert(leaf);
+            const_iterator result = leaf->lower_bound(k);
+            m_leaf_cache.unfix_node((leaf_bid_type)it->second);
 
-            assert(leaf_cache_.nfixed() == 0);
-            assert(node_cache_.nfixed() == 0);
+            assert(m_leaf_cache.nfixed() == 0);
+            assert(m_node_cache.nfixed() == 0);
             return result;
         }
 
         // 'it' points to a node
         STXXL_VERBOSE1("Searching lower bound in a node");
-        node_type const* Node = node_cache_.get_const_node((node_bid_type)it->second, true);
-        assert(Node);
-        const_iterator result = Node->lower_bound(k, height_ - 1);
-        node_cache_.unfix_node((node_bid_type)it->second);
+        const node_type* node = m_node_cache.get_const_node((node_bid_type)it->second, true);
+        assert(node);
+        const_iterator result = node->lower_bound(k, m_height - 1);
+        m_node_cache.unfix_node((node_bid_type)it->second);
 
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
         return result;
     }
 
     iterator upper_bound(const key_type& k)
     {
-        root_node_iterator_type it = root_node_.upper_bound(k);
-        assert(it != root_node_.end());
+        root_node_iterator_type it = m_root_node.upper_bound(k);
+        assert(it != m_root_node.end());
 
-        if (height_ == 2)                // 'it' points to a leaf
+        if (m_height == 2)                // 'it' points to a leaf
         {
             STXXL_VERBOSE1("Searching upper bound in a leaf");
-            leaf_type* Leaf = leaf_cache_.get_node((leaf_bid_type)it->second, true);
+            leaf_type* Leaf = m_leaf_cache.get_node((leaf_bid_type)it->second, true);
             assert(Leaf);
             iterator result = Leaf->upper_bound(k);
-            leaf_cache_.unfix_node((leaf_bid_type)it->second);
+            m_leaf_cache.unfix_node((leaf_bid_type)it->second);
 
-            assert(leaf_cache_.nfixed() == 0);
-            assert(node_cache_.nfixed() == 0);
+            assert(m_leaf_cache.nfixed() == 0);
+            assert(m_node_cache.nfixed() == 0);
             return result;
         }
 
         // 'it' points to a node
         STXXL_VERBOSE1("Searching upper bound in a node");
-        node_type* Node = node_cache_.get_node((node_bid_type)it->second, true);
+        node_type* Node = m_node_cache.get_node((node_bid_type)it->second, true);
         assert(Node);
-        iterator result = Node->upper_bound(k, height_ - 1);
-        node_cache_.unfix_node((node_bid_type)it->second);
+        iterator result = Node->upper_bound(k, m_height - 1);
+        m_node_cache.unfix_node((node_bid_type)it->second);
 
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
         return result;
     }
 
     const_iterator upper_bound(const key_type& k) const
     {
-        root_node_const_iterator_type it = root_node_.upper_bound(k);
-        assert(it != root_node_.end());
+        root_node_const_iterator_type it = m_root_node.upper_bound(k);
+        assert(it != m_root_node.end());
 
-        if (height_ == 2)                // 'it' points to a leaf
+        if (m_height == 2)                // 'it' points to a leaf
         {
             STXXL_VERBOSE1("Searching upper bound in a leaf");
-            leaf_type const* Leaf = leaf_cache_.get_const_node((leaf_bid_type)it->second, true);
-            assert(Leaf);
-            const_iterator result = Leaf->upper_bound(k);
-            leaf_cache_.unfix_node((leaf_bid_type)it->second);
+            const leaf_type* leaf = m_leaf_cache.get_const_node((leaf_bid_type)it->second, true);
+            assert(leaf);
+            const_iterator result = leaf->upper_bound(k);
+            m_leaf_cache.unfix_node((leaf_bid_type)it->second);
 
-            assert(leaf_cache_.nfixed() == 0);
-            assert(node_cache_.nfixed() == 0);
+            assert(m_leaf_cache.nfixed() == 0);
+            assert(m_node_cache.nfixed() == 0);
             return result;
         }
 
         // 'it' points to a node
         STXXL_VERBOSE1("Searching upper bound in a node");
-        node_type const* Node = node_cache_.get_const_node((node_bid_type)it->second, true);
-        assert(Node);
-        const_iterator result = Node->upper_bound(k, height_ - 1);
-        node_cache_.unfix_node((node_bid_type)it->second);
+        const node_type* node = m_node_cache.get_const_node((node_bid_type)it->second, true);
+        assert(node);
+        const_iterator result = node->upper_bound(k, m_height - 1);
+        m_node_cache.unfix_node((node_bid_type)it->second);
 
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
         return result;
     }
 
     std::pair<iterator, iterator> equal_range(const key_type& k)
     {
-        iterator l = lower_bound(k);                                    // l->first >= k
+        // l->first >= k
+        iterator l = lower_bound(k);
 
-        if (l == end() || key_compare_(k, l->first))                    // if (k < l->first)
+        // if (k < l->first)
+        if (l == end() || m_key_compare(k, l->first))
+            // then upper_bound == lower_bound
             return std::pair<iterator, iterator>(l, l);
-        // then upper_bound == lower_bound
 
         iterator u = l;
-        ++u;                                                            // only one element ==k can exist
+        // only one element ==k can exist
+        ++u;
 
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
 
-        return std::pair<iterator, iterator>(l, u);                     // then upper_bound == (lower_bound+1)
+        // then upper_bound == (lower_bound+1)
+        return std::pair<iterator, iterator>(l, u);
     }
 
     std::pair<const_iterator, const_iterator> equal_range(const key_type& k) const
     {
-        const_iterator l = lower_bound(k);                              // l->first >= k
+        // l->first >= k
+        const_iterator l = lower_bound(k);
 
-        if (l == end() || key_compare_(k, l->first))                    // if (k < l->first)
+        // if (k < l->first)
+        if (l == end() || m_key_compare(k, l->first))
+            // then upper_bound == lower_bound
             return std::pair<const_iterator, const_iterator>(l, l);
-        // then upper_bound == lower_bound
 
         const_iterator u = l;
-        ++u;                                                            // only one element ==k can exist
+        // only one element ==k can exist
+        ++u;
 
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
-        return std::pair<const_iterator, const_iterator>(l, u);         // then upper_bound == (lower_bound+1)
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
+        // then upper_bound == (lower_bound+1)
+        return std::pair<const_iterator, const_iterator>(l, u);
     }
 
     size_type erase(const key_type& k)
     {
-        root_node_iterator_type it = root_node_.lower_bound(k);
-        assert(it != root_node_.end());
-        if (height_ == 2)                // 'it' points to a leaf
+        root_node_iterator_type it = m_root_node.lower_bound(k);
+        assert(it != m_root_node.end());
+
+        if (m_height == 2)                // 'it' points to a leaf
         {
             STXXL_VERBOSE1("Deleting key from a leaf");
-            leaf_type* Leaf = leaf_cache_.get_node((leaf_bid_type)it->second, true);
+            leaf_type* Leaf = m_leaf_cache.get_node((leaf_bid_type)it->second, true);
             assert(Leaf);
             size_type result = Leaf->erase(k);
-            size_ -= result;
-            leaf_cache_.unfix_node((leaf_bid_type)it->second);
-            assert(leaf_cache_.nfixed() == 0);
-            assert(node_cache_.nfixed() == 0);
+            m_size -= result;
+            m_leaf_cache.unfix_node((leaf_bid_type)it->second);
+            assert(m_leaf_cache.nfixed() == 0);
+            assert(m_node_cache.nfixed() == 0);
 
-            if ((!Leaf->underflows()) || root_node_.size() == 1)
+            if ((!Leaf->underflows()) || m_root_node.size() == 1)
                 return result;
             // no underflow or root has a special degree 1 (too few elements)
 
             STXXL_VERBOSE1("btree: Fusing or rebalancing a leaf");
-            fuse_or_balance(it, leaf_cache_);
+            fuse_or_balance(it, m_leaf_cache);
 
-            assert(leaf_cache_.nfixed() == 0);
-            assert(node_cache_.nfixed() == 0);
+            assert(m_leaf_cache.nfixed() == 0);
+            assert(m_node_cache.nfixed() == 0);
 
             return result;
         }
 
         // 'it' points to a node
         STXXL_VERBOSE1("Deleting key from a node");
-        assert(root_node_.size() >= 2);
-        node_type* Node = node_cache_.get_node((node_bid_type)it->second, true);
-        assert(Node);
-        size_type result = Node->erase(k, height_ - 1);
-        size_ -= result;
-        node_cache_.unfix_node((node_bid_type)it->second);
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
-        if (!Node->underflows())
+        assert(m_root_node.size() >= 2);
+        node_type* node = m_node_cache.get_node((node_bid_type)it->second, true);
+        assert(node);
+        size_type result = node->erase(k, m_height - 1);
+        m_size -= result;
+        m_node_cache.unfix_node((node_bid_type)it->second);
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
+        if (!node->underflows())
             return result;
         // no underflow happened
 
         STXXL_VERBOSE1("Fusing or rebalancing a node");
-        fuse_or_balance(it, node_cache_);
+        fuse_or_balance(it, m_node_cache);
 
-        if (root_node_.size() == 1)
+        if (m_root_node.size() == 1)
         {
             STXXL_VERBOSE1("btree Root has size 1 and height > 2");
             STXXL_VERBOSE1("btree Deallocate root and decrease height");
-            it = root_node_.begin();
-            node_bid_type RootBid = it->second;
+            it = m_root_node.begin();
+            node_bid_type root_bid = it->second;
             assert(it->first == key_compare::max_value());
-            node_type* RootNode = node_cache_.get_node(RootBid);
-            assert(RootNode);
-            assert(RootNode->back().first == key_compare::max_value());
-            root_node_.clear();
-            root_node_.insert(RootNode->block().begin(),
-                              RootNode->block().begin() + RootNode->size());
-
-            node_cache_.delete_node(RootBid);
-            --height_;
-            STXXL_VERBOSE1("btree Decreasing height to " << height_);
+            node_type* root_node = m_node_cache.get_node(root_bid);
+            assert(root_node);
+            assert(root_node->back().first == key_compare::max_value());
+            m_root_node.clear();
+            m_root_node.insert(root_node->block().begin(),
+                               root_node->block().begin() + root_node->size());
+
+            m_node_cache.delete_node(root_bid);
+            --m_height;
+            STXXL_VERBOSE1("btree Decreasing height to " << m_height);
         }
 
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
 
         return result;
     }
@@ -892,21 +940,22 @@ public:
 
     iterator insert(iterator /*pos*/, const value_type& x)
     {
-        return insert(x).first;                 // pos ignored in the current version
+        // pos ignored in the current version
+        return insert(x).first;
     }
 
     void clear()
     {
         deallocate_children();
 
-        root_node_.clear();
+        m_root_node.clear();
 
-        size_ = 0;
-        height_ = 2,
+        m_size = 0;
+        m_height = 2,
 
         create_empty_leaf();
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
     }
 
     template <class InputIterator>
@@ -919,23 +968,22 @@ public:
     }
 
     template <class InputIterator>
-    btree(InputIterator b,
-          InputIterator e,
+    btree(InputIterator begin,
+          InputIterator end,
           const key_compare& c_,
           unsigned_type node_cache_size_in_bytes,
           unsigned_type leaf_cache_size_in_bytes,
           bool range_sorted = false,
           double node_fill_factor = 0.75,
-          double leaf_fill_factor = 0.6
-          ) :
-        key_compare_(c_),
-        node_cache_(node_cache_size_in_bytes, this, key_compare_),
-        leaf_cache_(leaf_cache_size_in_bytes, this, key_compare_),
-        iterator_map_(this),
-        size_(0),
-        height_(2),
-        prefetching_enabled_(true),
-        bm_(block_manager::get_instance())
+          double leaf_fill_factor = 0.6)
+        : m_key_compare(c_),
+          m_node_cache(node_cache_size_in_bytes, this, m_key_compare),
+          m_leaf_cache(leaf_cache_size_in_bytes, this, m_key_compare),
+          m_iterator_map(this),
+          m_size(0),
+          m_height(2),
+          m_prefetching_enabled(true),
+          m_bm(block_manager::get_instance())
     {
         STXXL_VERBOSE1("Creating a btree, addr=" << this);
         STXXL_VERBOSE1(" bytes in a node: " << node_bid_type::size);
@@ -944,34 +992,32 @@ public:
         if (range_sorted == false)
         {
             create_empty_leaf();
-            insert(b, e);
-            assert(leaf_cache_.nfixed() == 0);
-            assert(node_cache_.nfixed() == 0);
+            insert(begin, end);
+            assert(m_leaf_cache.nfixed() == 0);
+            assert(m_node_cache.nfixed() == 0);
             return;
         }
 
-        bulk_construction(b, e, node_fill_factor, leaf_fill_factor);
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
+        bulk_construction(begin, end, node_fill_factor, leaf_fill_factor);
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
     }
 
-
     template <class InputIterator>
-    btree(InputIterator b,
-          InputIterator e,
+    btree(InputIterator begin,
+          InputIterator end,
           unsigned_type node_cache_size_in_bytes,
           unsigned_type leaf_cache_size_in_bytes,
           bool range_sorted = false,
           double node_fill_factor = 0.75,
-          double leaf_fill_factor = 0.6
-          ) :
-        node_cache_(node_cache_size_in_bytes, this, key_compare_),
-        leaf_cache_(leaf_cache_size_in_bytes, this, key_compare_),
-        iterator_map_(this),
-        size_(0),
-        height_(2),
-        prefetching_enabled_(true),
-        bm_(block_manager::get_instance())
+          double leaf_fill_factor = 0.6)
+        : m_node_cache(node_cache_size_in_bytes, this, m_key_compare),
+          m_leaf_cache(leaf_cache_size_in_bytes, this, m_key_compare),
+          m_iterator_map(this),
+          m_size(0),
+          m_height(2),
+          m_prefetching_enabled(true),
+          m_bm(block_manager::get_instance())
     {
         STXXL_VERBOSE1("Creating a btree, addr=" << this);
         STXXL_VERBOSE1(" bytes in a node: " << node_bid_type::size);
@@ -980,22 +1026,21 @@ public:
         if (range_sorted == false)
         {
             create_empty_leaf();
-            insert(b, e);
-            assert(leaf_cache_.nfixed() == 0);
-            assert(node_cache_.nfixed() == 0);
+            insert(begin, end);
+            assert(m_leaf_cache.nfixed() == 0);
+            assert(m_node_cache.nfixed() == 0);
             return;
         }
 
-        bulk_construction(b, e, node_fill_factor, leaf_fill_factor);
-        assert(leaf_cache_.nfixed() == 0);
-        assert(node_cache_.nfixed() == 0);
+        bulk_construction(begin, end, node_fill_factor, leaf_fill_factor);
+        assert(m_leaf_cache.nfixed() == 0);
+        assert(m_node_cache.nfixed() == 0);
     }
 
     void erase(iterator first, iterator last)
     {
         if (first == begin() && last == end())
             clear();
-
         else
             while (first != last)
                 erase(first++);
@@ -1003,54 +1048,53 @@ public:
 
     key_compare key_comp() const
     {
-        return key_compare_;
+        return m_key_compare;
     }
     value_compare value_comp() const
     {
-        return value_compare(key_compare_);
+        return value_compare(m_key_compare);
     }
 
     void swap(btree& obj)
     {
-        std::swap(key_compare_, obj.key_compare_);              // OK
+        std::swap(m_key_compare, obj.m_key_compare);   // OK
 
-        std::swap(node_cache_, obj.node_cache_);                // OK
-        std::swap(leaf_cache_, obj.leaf_cache_);                // OK
+        std::swap(m_node_cache, obj.m_node_cache);     // OK
+        std::swap(m_leaf_cache, obj.m_leaf_cache);     // OK
 
+        std::swap(m_iterator_map, obj.m_iterator_map); // must update all iterators
 
-        std::swap(iterator_map_, obj.iterator_map_);            // must update all iterators
-
-        std::swap(end_iterator, obj.end_iterator);
-        std::swap(size_, obj.size_);
-        std::swap(height_, obj.height_);
-        std::swap(alloc_strategy_, obj.alloc_strategy_);
-        std::swap(root_node_, obj.root_node_);
+        std::swap(m_end_iterator, obj.m_end_iterator);
+        std::swap(m_size, obj.m_size);
+        std::swap(m_height, obj.m_height);
+        std::swap(m_alloc_strategy, obj.m_alloc_strategy);
+        std::swap(m_root_node, obj.m_root_node);
     }
 
     void enable_prefetching()
     {
-        prefetching_enabled_ = true;
+        m_prefetching_enabled = true;
     }
     void disable_prefetching()
     {
-        prefetching_enabled_ = false;
+        m_prefetching_enabled = false;
     }
     bool prefetching_enabled()
     {
-        return prefetching_enabled_;
+        return m_prefetching_enabled;
     }
 
     void print_statistics(std::ostream& o) const
     {
         o << "Node cache statistics:" << std::endl;
-        node_cache_.print_statistics(o);
+        m_node_cache.print_statistics(o);
         o << "Leaf cache statistics:" << std::endl;
-        leaf_cache_.print_statistics(o);
+        m_leaf_cache.print_statistics(o);
     }
     void reset_statistics()
     {
-        node_cache_.reset_statistics();
-        leaf_cache_.reset_statistics();
+        m_node_cache.reset_statistics();
+        m_leaf_cache.reset_statistics();
     }
 };
 
@@ -1061,8 +1105,11 @@ template <class KeyType,
           unsigned LogLeafSize,
           class PDAllocStrategy
           >
-inline bool operator == (const btree<KeyType, DataType, CompareType, LogNodeSize, LogLeafSize, PDAllocStrategy>& a,
-                         const btree<KeyType, DataType, CompareType, LogNodeSize, LogLeafSize, PDAllocStrategy>& b)
+inline bool operator ==
+    (const btree<KeyType, DataType, CompareType,
+                 LogNodeSize, LogLeafSize, PDAllocStrategy>& a,
+    const btree<KeyType, DataType, CompareType,
+                LogNodeSize, LogLeafSize, PDAllocStrategy>& b)
 {
     return a.size() == b.size() && std::equal(a.begin(), a.end(), b.begin());
 }
@@ -1074,13 +1121,15 @@ template <class KeyType,
           unsigned LogLeafSize,
           class PDAllocStrategy
           >
-inline bool operator != (const btree<KeyType, DataType, CompareType, LogNodeSize, LogLeafSize, PDAllocStrategy>& a,
-                         const btree<KeyType, DataType, CompareType, LogNodeSize, LogLeafSize, PDAllocStrategy>& b)
+inline bool operator !=
+    (const btree<KeyType, DataType, CompareType,
+                 LogNodeSize, LogLeafSize, PDAllocStrategy>& a,
+    const btree<KeyType, DataType, CompareType,
+                LogNodeSize, LogLeafSize, PDAllocStrategy>& b)
 {
     return !(a == b);
 }
 
-
 template <class KeyType,
           class DataType,
           class CompareType,
@@ -1088,13 +1137,15 @@ template <class KeyType,
           unsigned LogLeafSize,
           class PDAllocStrategy
           >
-inline bool operator < (const btree<KeyType, DataType, CompareType, LogNodeSize, LogLeafSize, PDAllocStrategy>& a,
-                        const btree<KeyType, DataType, CompareType, LogNodeSize, LogLeafSize, PDAllocStrategy>& b)
+inline bool operator <
+    (const btree<KeyType, DataType, CompareType,
+                 LogNodeSize, LogLeafSize, PDAllocStrategy>& a,
+    const btree<KeyType, DataType, CompareType,
+                LogNodeSize, LogLeafSize, PDAllocStrategy>& b)
 {
     return std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
 }
 
-
 template <class KeyType,
           class DataType,
           class CompareType,
@@ -1102,13 +1153,15 @@ template <class KeyType,
           unsigned LogLeafSize,
           class PDAllocStrategy
           >
-inline bool operator > (const btree<KeyType, DataType, CompareType, LogNodeSize, LogLeafSize, PDAllocStrategy>& a,
-                        const btree<KeyType, DataType, CompareType, LogNodeSize, LogLeafSize, PDAllocStrategy>& b)
+inline bool operator >
+    (const btree<KeyType, DataType, CompareType,
+                 LogNodeSize, LogLeafSize, PDAllocStrategy>& a,
+    const btree<KeyType, DataType, CompareType,
+                LogNodeSize, LogLeafSize, PDAllocStrategy>& b)
 {
     return b < a;
 }
 
-
 template <class KeyType,
           class DataType,
           class CompareType,
@@ -1116,8 +1169,11 @@ template <class KeyType,
           unsigned LogLeafSize,
           class PDAllocStrategy
           >
-inline bool operator <= (const btree<KeyType, DataType, CompareType, LogNodeSize, LogLeafSize, PDAllocStrategy>& a,
-                         const btree<KeyType, DataType, CompareType, LogNodeSize, LogLeafSize, PDAllocStrategy>& b)
+inline bool operator <=
+    (const btree<KeyType, DataType, CompareType,
+                 LogNodeSize, LogLeafSize, PDAllocStrategy>& a,
+    const btree<KeyType, DataType, CompareType,
+                LogNodeSize, LogLeafSize, PDAllocStrategy>& b)
 {
     return !(b < a);
 }
@@ -1129,8 +1185,11 @@ template <class KeyType,
           unsigned LogLeafSize,
           class PDAllocStrategy
           >
-inline bool operator >= (const btree<KeyType, DataType, CompareType, LogNodeSize, LogLeafSize, PDAllocStrategy>& a,
-                         const btree<KeyType, DataType, CompareType, LogNodeSize, LogLeafSize, PDAllocStrategy>& b)
+inline bool operator >=
+    (const btree<KeyType, DataType, CompareType,
+                 LogNodeSize, LogLeafSize, PDAllocStrategy>& a,
+    const btree<KeyType, DataType, CompareType,
+                LogNodeSize, LogLeafSize, PDAllocStrategy>& b)
 {
     return !(a < b);
 }
@@ -1139,7 +1198,6 @@ inline bool operator >= (const btree<KeyType, DataType, CompareType, LogNodeSize
 
 STXXL_END_NAMESPACE
 
-
 namespace std {
 
 template <class KeyType,
@@ -1149,8 +1207,10 @@ template <class KeyType,
           unsigned LogLeafSize,
           class PDAllocStrategy
           >
-void swap(stxxl::btree::btree<KeyType, DataType, CompareType, LogNodeSize, LogLeafSize, PDAllocStrategy>& a,
-          stxxl::btree::btree<KeyType, DataType, CompareType, LogNodeSize, LogLeafSize, PDAllocStrategy>& b)
+void swap(stxxl::btree::btree<KeyType, DataType, CompareType,
+                              LogNodeSize, LogLeafSize, PDAllocStrategy>& a,
+          stxxl::btree::btree<KeyType, DataType, CompareType,
+                              LogNodeSize, LogLeafSize, PDAllocStrategy>& b)
 {
     if (&a != &b)
         a.swap(b);
diff --git a/include/stxxl/bits/containers/btree/iterator.h b/include/stxxl/bits/containers/btree/iterator.h
index bc07838..5854d92 100644
--- a/include/stxxl/bits/containers/btree/iterator.h
+++ b/include/stxxl/bits/containers/btree/iterator.h
@@ -16,11 +16,10 @@
 #include <iterator>
 #include <cassert>
 #include <stxxl/bits/verbose.h>
-
+#include <stxxl/bits/common/types.h>
 
 STXXL_BEGIN_NAMESPACE
 
-
 namespace btree {
 
 template <class BTreeType>
@@ -29,7 +28,8 @@ template <class BTreeType>
 class btree_iterator;
 template <class BTreeType>
 class btree_const_iterator;
-template <class KeyType_, class DataType_, class KeyCmp_, unsigned LogNElem_, class BTreeType>
+template <class KeyType, class DataType, class KeyCmp,
+          unsigned LogNElem, class BTreeType>
 class normal_leaf;
 
 template <class BTreeType>
@@ -44,22 +44,24 @@ public:
     typedef std::bidirectional_iterator_tag iterator_category;
     typedef typename btree_type::difference_type difference_type;
 
+    typedef typename btree_type::leaf_type leaf_type;
+
     friend class iterator_map<btree_type>;
-    template <class KeyType_, class DataType_,
-              class KeyCmp_, unsigned LogNElem_, class BTreeType__>
+    template <class KeyType, class DataType,
+              class KeyCmp, unsigned LogNElem, class AnyBTreeType>
     friend class normal_leaf;
 
-    template <class BTreeType_>
-    friend bool operator == (const btree_iterator<BTreeType_>& a,
-                             const btree_const_iterator<BTreeType_>& b);
-    template <class BTreeType_>
-    friend bool operator != (const btree_iterator<BTreeType_>& a,
-                             const btree_const_iterator<BTreeType_>& b);
+    template <class AnyBTreeType>
+    friend bool operator == (const btree_iterator<AnyBTreeType>& a,
+                             const btree_const_iterator<AnyBTreeType>& b);
+    template <class AnyBTreeType>
+    friend bool operator != (const btree_iterator<AnyBTreeType>& a,
+                             const btree_const_iterator<AnyBTreeType>& b);
 
 protected:
-    btree_type* btree_;
+    btree_type* btree;
     bid_type bid;
-    unsigned pos;
+    unsigned_type pos;
 
     btree_iterator_base()
     {
@@ -67,30 +69,29 @@ protected:
         make_invalid();
     }
 
-    btree_iterator_base(
-        btree_type* btree__,
-        const bid_type& b,
-        unsigned p
-        ) : btree_(btree__), bid(b), pos(p)
+    btree_iterator_base(btree_type* _btree,
+                        const bid_type& _bid,
+                        unsigned_type _pos)
+        : btree(_btree), bid(_bid), pos(_pos)
     {
         STXXL_VERBOSE3("btree_iterator_base parameter construct addr=" << this);
-        btree_->iterator_map_.register_iterator(*this);
+        btree->m_iterator_map.register_iterator(*this);
     }
 
     void make_invalid()
     {
-        btree_ = NULL;
+        btree = NULL;
         pos = 0;
     }
 
     btree_iterator_base(const btree_iterator_base& obj)
     {
         STXXL_VERBOSE3("btree_iterator_base constr from" << (&obj) << " to " << this);
-        btree_ = obj.btree_;
+        btree = obj.btree;
         bid = obj.bid;
         pos = obj.pos;
-        if (btree_)
-            btree_->iterator_map_.register_iterator(*this);
+        if (btree)
+            btree->m_iterator_map.register_iterator(*this);
     }
 
     btree_iterator_base& operator = (const btree_iterator_base& obj)
@@ -98,63 +99,63 @@ protected:
         STXXL_VERBOSE3("btree_iterator_base copy from" << (&obj) << " to " << this);
         if (&obj != this)
         {
-            if (btree_)
-                btree_->iterator_map_.unregister_iterator(*this);
+            if (btree)
+                btree->m_iterator_map.unregister_iterator(*this);
 
-            btree_ = obj.btree_;
+            btree = obj.btree;
             bid = obj.bid;
             pos = obj.pos;
-            if (btree_)
-                btree_->iterator_map_.register_iterator(*this);
+            if (btree)
+                btree->m_iterator_map.register_iterator(*this);
         }
         return *this;
     }
 
     reference non_const_access()
     {
-        assert(btree_);
-        typename btree_type::leaf_type * Leaf = btree_->leaf_cache_.get_node(bid);
-        assert(Leaf);
-        return (reference)((*Leaf)[pos]);
+        assert(btree);
+        leaf_type* leaf = btree->m_leaf_cache.get_node(bid);
+        assert(leaf);
+        return (reference)((*leaf)[pos]);
     }
 
     const_reference const_access() const
     {
-        assert(btree_);
-        typename btree_type::leaf_type const* Leaf = btree_->leaf_cache_.get_const_node(bid);
-        assert(Leaf);
-        return (reference)((*Leaf)[pos]);
+        assert(btree);
+        leaf_type const* leaf = btree->m_leaf_cache.get_const_node(bid);
+        assert(leaf);
+        return (reference)((*leaf)[pos]);
     }
 
     bool operator == (const btree_iterator_base& obj) const
     {
-        return bid == obj.bid && pos == obj.pos && btree_ == obj.btree_;
+        return bid == obj.bid && pos == obj.pos && btree == obj.btree;
     }
 
     bool operator != (const btree_iterator_base& obj) const
     {
-        return bid != obj.bid || pos != obj.pos || btree_ != obj.btree_;
+        return bid != obj.bid || pos != obj.pos || btree != obj.btree;
     }
 
     btree_iterator_base & increment()
     {
-        assert(btree_);
+        assert(btree);
         bid_type cur_bid = bid;
-        typename btree_type::leaf_type const* Leaf = btree_->leaf_cache_.get_const_node(bid, true);
-        assert(Leaf);
-        Leaf->increment_iterator(*this);
-        btree_->leaf_cache_.unfix_node(cur_bid);
+        const leaf_type* leaf = btree->m_leaf_cache.get_const_node(bid, true);
+        assert(leaf);
+        leaf->increment_iterator(*this);
+        btree->m_leaf_cache.unfix_node(cur_bid);
         return *this;
     }
 
     btree_iterator_base & decrement()
     {
-        assert(btree_);
+        assert(btree);
         bid_type cur_bid = bid;
-        typename btree_type::leaf_type const* Leaf = btree_->leaf_cache_.get_const_node(bid, true);
-        assert(Leaf);
-        Leaf->decrement_iterator(*this);
-        btree_->leaf_cache_.unfix_node(cur_bid);
+        const leaf_type* leaf = btree->m_leaf_cache.get_const_node(bid, true);
+        assert(leaf);
+        leaf->decrement_iterator(*this);
+        btree->m_leaf_cache.unfix_node(cur_bid);
         return *this;
     }
 
@@ -162,8 +163,8 @@ public:
     virtual ~btree_iterator_base()
     {
         STXXL_VERBOSE3("btree_iterator_base deconst " << this);
-        if (btree_)
-            btree_->iterator_map_.unregister_iterator(*this);
+        if (btree)
+            btree->m_iterator_map.unregister_iterator(*this);
     }
 };
 
@@ -178,22 +179,25 @@ public:
     typedef typename btree_type::const_reference const_reference;
     typedef typename btree_type::pointer pointer;
 
-    template <class KeyType_, class DataType_,
-              class KeyCmp_, unsigned LogNElem_, class BTreeType__>
+    typedef btree_iterator_base<btree_type> base_type;
+
+    template <class KeyType, class DataType,
+              class KeyCmp, unsigned LogNElem, class AnyBTreeType>
     friend class normal_leaf;
 
-    using btree_iterator_base<btree_type>::non_const_access;
+    using base_type::non_const_access;
 
-    btree_iterator() : btree_iterator_base<btree_type>()
+    btree_iterator()
+        : base_type()
     { }
 
-    btree_iterator(const btree_iterator& obj) :
-        btree_iterator_base<btree_type>(obj)
+    btree_iterator(const btree_iterator& obj)
+        : base_type(obj)
     { }
 
     btree_iterator& operator = (const btree_iterator& obj)
     {
-        btree_iterator_base<btree_type>::operator = (obj);
+        base_type::operator = (obj);
         return *this;
     }
 
@@ -209,48 +213,47 @@ public:
 
     bool operator == (const btree_iterator& obj) const
     {
-        return btree_iterator_base<btree_type>::operator == (obj);
+        return base_type::operator == (obj);
     }
 
     bool operator != (const btree_iterator& obj) const
     {
-        return btree_iterator_base<btree_type>::operator != (obj);
+        return base_type::operator != (obj);
     }
 
     btree_iterator& operator ++ ()
     {
-        assert(*this != btree_iterator_base<btree_type>::btree_->end());
-        btree_iterator_base<btree_type>::increment();
+        assert(*this != base_type::btree->end());
+        base_type::increment();
         return *this;
     }
 
     btree_iterator& operator -- ()
     {
-        btree_iterator_base<btree_type>::decrement();
+        base_type::decrement();
         return *this;
     }
 
     btree_iterator operator ++ (int)
     {
-        assert(*this != btree_iterator_base<btree_type>::btree_->end());
+        assert(*this != base_type::btree->end());
         btree_iterator result(*this);
-        btree_iterator_base<btree_type>::increment();
+        base_type::increment();
         return result;
     }
 
     btree_iterator operator -- (int)
     {
         btree_iterator result(*this);
-        btree_iterator_base<btree_type>::decrement();
+        base_type::decrement();
         return result;
     }
 
 private:
-    btree_iterator(
-        btree_type* btree__,
-        const bid_type& b,
-        unsigned p
-        ) : btree_iterator_base<btree_type>(btree__, b, p)
+    btree_iterator(btree_type* _btree,
+                   const bid_type& _bid,
+                   unsigned_type _pos)
+        : base_type(_btree, _bid, _pos)
     { }
 };
 
@@ -266,26 +269,29 @@ public:
     typedef typename btree_type::const_reference reference;
     typedef typename btree_type::const_pointer pointer;
 
-    template <class KeyType_, class DataType_,
-              class KeyCmp_, unsigned LogNElem_, class BTreeType__>
+    typedef btree_iterator_base<btree_type> base_type;
+
+    template <class KeyType, class DataType,
+              class KeyCmp, unsigned LogNElem, class AnyBTreeType>
     friend class normal_leaf;
 
-    using btree_iterator_base<btree_type>::const_access;
+    using base_type::const_access;
 
-    btree_const_iterator() : btree_iterator_base<btree_type>()
+    btree_const_iterator()
+        : base_type()
     { }
 
-    btree_const_iterator(const btree_const_iterator& obj) :
-        btree_iterator_base<btree_type>(obj)
+    btree_const_iterator(const btree_const_iterator& obj)
+        : base_type(obj)
     { }
 
-    btree_const_iterator(const iterator& obj) :
-        btree_iterator_base<btree_type>(obj)
+    btree_const_iterator(const iterator& obj)
+        : base_type(obj)
     { }
 
     btree_const_iterator& operator = (const btree_const_iterator& obj)
     {
-        btree_iterator_base<btree_type>::operator = (obj);
+        base_type::operator = (obj);
         return *this;
     }
 
@@ -299,61 +305,59 @@ public:
         return &(const_access());
     }
 
-
     bool operator == (const iterator& obj) const
     {
-        return btree_iterator_base<btree_type>::operator == (obj);
+        return base_type::operator == (obj);
     }
 
     bool operator != (const iterator& obj) const
     {
-        return btree_iterator_base<btree_type>::operator != (obj);
+        return base_type::operator != (obj);
     }
 
     bool operator == (const btree_const_iterator& obj) const
     {
-        return btree_iterator_base<btree_type>::operator == (obj);
+        return base_type::operator == (obj);
     }
 
     bool operator != (const btree_const_iterator& obj) const
     {
-        return btree_iterator_base<btree_type>::operator != (obj);
+        return base_type::operator != (obj);
     }
 
     btree_const_iterator& operator ++ ()
     {
-        assert(*this != btree_iterator_base<btree_type>::btree_->end());
-        btree_iterator_base<btree_type>::increment();
+        assert(*this != base_type::btree->end());
+        base_type::increment();
         return *this;
     }
 
     btree_const_iterator& operator -- ()
     {
-        btree_iterator_base<btree_type>::decrement();
+        base_type::decrement();
         return *this;
     }
 
     btree_const_iterator operator ++ (int)
     {
-        assert(*this != btree_iterator_base<btree_type>::btree_->end());
+        assert(*this != base_type::btree->end());
         btree_const_iterator result(*this);
-        btree_iterator_base<btree_type>::increment();
+        base_type::increment();
         return result;
     }
 
     btree_const_iterator operator -- (int)
     {
         btree_const_iterator result(*this);
-        btree_iterator_base<btree_type>::decrement();
+        base_type::decrement();
         return result;
     }
 
 private:
-    btree_const_iterator(
-        btree_type* btree__,
-        const bid_type& b,
-        unsigned p
-        ) : btree_iterator_base<btree_type>(btree__, b, p)
+    btree_const_iterator(btree_type* _btree,
+                         const bid_type& _bid,
+                         unsigned_type _pos)
+        : base_type(_btree, _bid, _pos)
     { }
 };
 
diff --git a/include/stxxl/bits/containers/btree/iterator_map.h b/include/stxxl/bits/containers/btree/iterator_map.h
index 92f2981..68991e5 100644
--- a/include/stxxl/bits/containers/btree/iterator_map.h
+++ b/include/stxxl/bits/containers/btree/iterator_map.h
@@ -19,7 +19,6 @@
 #include <stxxl/bits/containers/btree/iterator.h>
 #include <stxxl/bits/common/error_handling.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 namespace btree {
@@ -36,9 +35,10 @@ private:
     struct Key
     {
         bid_type bid;
-        unsigned pos;
+        unsigned_type pos;
         Key() { }
-        Key(const bid_type& b, unsigned p) : bid(b), pos(p) { }
+        Key(const bid_type& b, unsigned_type p)
+            : bid(b), pos(p) { }
     };
 
     struct bid_comp
@@ -59,55 +59,55 @@ private:
 
     typedef std::multimap<Key, iterator_base*, KeyCmp> multimap_type;
 
-    multimap_type It2Addr_;
-    btree_type* btree_;
+    multimap_type m_it2addr;
+    btree_type* m_btree;
 
     typedef typename multimap_type::value_type pair_type;
     typedef typename multimap_type::iterator mmiterator_type;
     typedef typename multimap_type::const_iterator mmconst_iterator_type;
 
-
-    // changes btree pointer in all contained iterators
+    //! changes btree pointer in all contained iterators
     void change_btree_pointers(btree_type* b)
     {
-        mmconst_iterator_type it = It2Addr_.begin();
-        for ( ; it != It2Addr_.end(); ++it)
+        for (mmconst_iterator_type it = m_it2addr.begin();
+             it != m_it2addr.end(); ++it)
         {
-            (it->second)->btree_ = b;
+            (it->second)->btree = b;
         }
     }
 
 public:
-    iterator_map(btree_type* b) : btree_(b)
+    iterator_map(btree_type* b)
+        : m_btree(b)
     { }
 
     void register_iterator(iterator_base& it)
     {
         STXXL_VERBOSE2("btree::iterator_map register_iterator addr=" << &it <<
                        " BID: " << it.bid << " POS: " << it.pos);
-        It2Addr_.insert(pair_type(Key(it.bid, it.pos), &it));
+        m_it2addr.insert(pair_type(Key(it.bid, it.pos), &it));
     }
     void unregister_iterator(iterator_base& it)
     {
         STXXL_VERBOSE2("btree::iterator_map unregister_iterator addr=" << &it <<
                        " BID: " << it.bid << " POS: " << it.pos);
-        assert(!It2Addr_.empty());
+        assert(!m_it2addr.empty());
         Key key(it.bid, it.pos);
         std::pair<mmiterator_type, mmiterator_type> range =
-            It2Addr_.equal_range(key);
+            m_it2addr.equal_range(key);
 
         assert(range.first != range.second);
 
         mmiterator_type i = range.first;
         for ( ; i != range.second; ++i)
         {
-            assert(it.bid == (*i).first.bid);
-            assert(it.pos == (*i).first.pos);
+            assert(it.bid == i->first.bid);
+            assert(it.pos == i->first.pos);
 
-            if ((*i).second == &it)
+            if (i->second == &it)
             {
                 // found it
-                It2Addr_.erase(i);
+                m_it2addr.erase(i);
                 return;
             }
         }
@@ -116,36 +116,37 @@ public:
     }
     template <class OutputContainer>
     void find(const bid_type& bid,
-              unsigned first_pos,
-              unsigned last_pos,
-              OutputContainer& out
-              )
+              unsigned_type first_pos,
+              unsigned_type last_pos,
+              OutputContainer& out)
     {
         Key firstkey(bid, first_pos);
         Key lastkey(bid, last_pos);
-        mmconst_iterator_type begin = It2Addr_.lower_bound(firstkey);
-        mmconst_iterator_type end = It2Addr_.upper_bound(lastkey);
+        mmconst_iterator_type begin = m_it2addr.lower_bound(firstkey);
+        mmconst_iterator_type end = m_it2addr.upper_bound(lastkey);
 
-        mmconst_iterator_type i = begin;
-        for ( ; i != end; ++i)
+        for (mmconst_iterator_type i = begin;
+             i != end; ++i)
         {
-            assert(bid == (*i).first.bid);
-            out.push_back((*i).second);
+            assert(bid == i->first.bid);
+            out.push_back(i->second);
         }
     }
 
     virtual ~iterator_map()
     {
-        mmconst_iterator_type it = It2Addr_.begin();
-        for ( ; it != It2Addr_.end(); ++it)
-            (it->second)->make_invalid();
+        for (mmconst_iterator_type it = m_it2addr.begin();
+             it != m_it2addr.end(); ++it)
+        {
+            it->second->make_invalid();
+        }
     }
 
     void swap(iterator_map& obj)
     {
-        std::swap(It2Addr_, obj.It2Addr_);
-        change_btree_pointers(btree_);
-        obj.change_btree_pointers(obj.btree_);
+        std::swap(m_it2addr, obj.m_it2addr);
+        change_btree_pointers(m_btree);
+        obj.change_btree_pointers(obj.m_btree);
     }
 };
 
@@ -153,7 +154,6 @@ public:
 
 STXXL_END_NAMESPACE
 
-
 namespace std {
 
 template <class BTreeType>
diff --git a/include/stxxl/bits/containers/btree/leaf.h b/include/stxxl/bits/containers/btree/leaf.h
index 77a93cf..d7c619f 100644
--- a/include/stxxl/bits/containers/btree/leaf.h
+++ b/include/stxxl/bits/containers/btree/leaf.h
@@ -16,7 +16,6 @@
 #include <stxxl/bits/containers/btree/iterator.h>
 #include <stxxl/bits/containers/btree/node_cache.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 namespace btree {
@@ -24,32 +23,34 @@ namespace btree {
 template <class NodeType, class BTreeType>
 class node_cache;
 
-template <class KeyType_, class DataType_, class KeyCmp_, unsigned RawSize_, class BTreeType>
+template <class KeyType, class DataType, class KeyCmp, unsigned RawSize, class BTreeType>
 class normal_leaf : private noncopyable
 {
 public:
-    typedef normal_leaf<KeyType_, DataType_, KeyCmp_, RawSize_, BTreeType> SelfType;
+    typedef normal_leaf<KeyType, DataType, KeyCmp, RawSize, BTreeType> self_type;
 
-    friend class node_cache<SelfType, BTreeType>;
+    friend class node_cache<self_type, BTreeType>;
 
-    typedef KeyType_ key_type;
-    typedef DataType_ data_type;
-    typedef KeyCmp_ key_compare;
+    typedef KeyType key_type;
+    typedef DataType data_type;
+    typedef KeyCmp key_compare;
     typedef std::pair<key_type, data_type> value_type;
     typedef value_type& reference;
     typedef const value_type& const_reference;
 
     enum {
-        raw_size = RawSize_
+        raw_size = RawSize
     };
+
     typedef BID<raw_size> bid_type;
-    struct InfoType
+
+    struct metainfo_type
     {
         bid_type me, pred, succ;
         unsigned cur_size;
     };
 
-    typedef typed_block<raw_size, value_type, 0, InfoType> block_type;
+    typedef typed_block<raw_size, value_type, 0, metainfo_type> block_type;
     enum {
         nelements = block_type::size - 1,
         max_size = nelements,
@@ -78,97 +79,97 @@ public:
     };
 
 private:
-    block_type* block_;
-    btree_type* btree_;
+    block_type* m_block;
+    btree_type* m_btree;
 
-    key_compare cmp_;
-    value_compare vcmp_;
+    key_compare m_cmp;
+    value_compare m_vcmp;
 
     void split(std::pair<key_type, bid_type>& splitter)
     {
-        bid_type NewBid;
-        btree_->leaf_cache_.get_new_node(NewBid);                         // new (left) leaf
-        normal_leaf* NewLeaf = btree_->leaf_cache_.get_node(NewBid, true);
-        assert(NewLeaf);
+        bid_type new_bid;
+        m_btree->m_leaf_cache.get_new_node(new_bid);                         // new (left) leaf
+        normal_leaf* new_leaf = m_btree->m_leaf_cache.get_node(new_bid, true);
+        assert(new_leaf);
 
         // update links
-        NewLeaf->succ() = my_bid();
-        normal_leaf* PredLeaf = NULL;
+        new_leaf->succ() = my_bid();
+        normal_leaf* pred_leaf = NULL;
         if (pred().valid())
         {
-            NewLeaf->pred() = pred();
-            PredLeaf = btree_->leaf_cache_.get_node(pred());
-            assert(PredLeaf);
-            assert(vcmp_(PredLeaf->back(), front()));
-            PredLeaf->succ() = NewBid;
+            new_leaf->pred() = pred();
+            pred_leaf = m_btree->m_leaf_cache.get_node(pred());
+            assert(pred_leaf);
+            assert(m_vcmp(pred_leaf->back(), front()));
+            pred_leaf->succ() = new_bid;
         }
-        pred() = NewBid;
+        pred() = new_bid;
 
-        std::vector<iterator_base*> Iterators2Fix;
-        btree_->iterator_map_.find(my_bid(), 0, size(), Iterators2Fix);
+        typedef std::vector<iterator_base*> iterators2fix_type;
+        iterators2fix_type iterators2fix;
+        m_btree->m_iterator_map.find(my_bid(), 0, size(), iterators2fix);
 
         const unsigned end_of_smaller_part = size() / 2;
 
-        splitter.first = ((*block_)[end_of_smaller_part - 1]).first;
-        splitter.second = NewBid;
+        splitter.first = ((*m_block)[end_of_smaller_part - 1]).first;
+        splitter.second = new_bid;
 
         const unsigned old_size = size();
         // copy the smaller part
-        std::copy(block_->begin(), block_->begin() + end_of_smaller_part, NewLeaf->block_->begin());
-        NewLeaf->block_->info.cur_size = end_of_smaller_part;
+        std::copy(m_block->begin(), m_block->begin() + end_of_smaller_part,
+                  new_leaf->m_block->begin());
+        new_leaf->m_block->info.cur_size = end_of_smaller_part;
         // copy the larger part
-        std::copy(block_->begin() + end_of_smaller_part,
-                  block_->begin() + old_size, block_->begin());
-        block_->info.cur_size = old_size - end_of_smaller_part;
-        assert(size() + NewLeaf->size() == old_size);
+        std::copy(m_block->begin() + end_of_smaller_part,
+                  m_block->begin() + old_size, m_block->begin());
+        m_block->info.cur_size = old_size - end_of_smaller_part;
+        assert(size() + new_leaf->size() == old_size);
 
         // fix iterators
-        typename std::vector<iterator_base*>::iterator it2fix = Iterators2Fix.begin();
-        for ( ; it2fix != Iterators2Fix.end(); ++it2fix)
+        for (typename iterators2fix_type::iterator it2fix = iterators2fix.begin();
+             it2fix != iterators2fix.end(); ++it2fix)
         {
-            btree_->iterator_map_.unregister_iterator(**it2fix);
+            m_btree->m_iterator_map.unregister_iterator(**it2fix);
 
             if ((*it2fix)->pos < end_of_smaller_part)     // belongs to the smaller part
-                (*it2fix)->bid = NewBid;
+                (*it2fix)->bid = new_bid;
 
             else
                 (*it2fix)->pos -= end_of_smaller_part;
 
-
-            btree_->iterator_map_.register_iterator(**it2fix);
+            m_btree->m_iterator_map.register_iterator(**it2fix);
         }
 
-
         STXXL_VERBOSE1("btree::normal_leaf split leaf " << this
                                                         << " splitter: " << splitter.first);
 
 #if STXXL_VERBOSE_LEVEL >= 1
-        if (PredLeaf)
+        if (pred_leaf)
         {
-            STXXL_VERBOSE1("btree::normal_leaf pred_part.smallest    = " << PredLeaf->front().first);
-            STXXL_VERBOSE1("btree::normal_leaf pred_part.largest     = " << PredLeaf->back().first);
+            STXXL_VERBOSE1("btree::normal_leaf pred_part.smallest    = " << pred_leaf->front().first);
+            STXXL_VERBOSE1("btree::normal_leaf pred_part.largest     = " << pred_leaf->back().first);
         }
 #endif
-        STXXL_VERBOSE1("btree::normal_leaf smaller_part.smallest = " << NewLeaf->front().first);
-        STXXL_VERBOSE1("btree::normal_leaf smaller_part.largest  = " << NewLeaf->back().first);
+        STXXL_VERBOSE1("btree::normal_leaf smaller_part.smallest = " << new_leaf->front().first);
+        STXXL_VERBOSE1("btree::normal_leaf smaller_part.largest  = " << new_leaf->back().first);
         STXXL_VERBOSE1("btree::normal_leaf larger_part.smallest  = " << front().first);
         STXXL_VERBOSE1("btree::normal_leaf larger_part.largest   = " << back().first);
 
-        btree_->leaf_cache_.unfix_node(NewBid);
+        m_btree->m_leaf_cache.unfix_node(new_bid);
     }
 
 public:
     virtual ~normal_leaf()
     {
-        delete block_;
+        delete m_block;
     }
 
-    normal_leaf(btree_type* btree__,
-                key_compare cmp) :
-        block_(new block_type),
-        btree_(btree__),
-        cmp_(cmp),
-        vcmp_(cmp)
+    normal_leaf(btree_type* btree,
+                key_compare cmp)
+        : m_block(new block_type),
+          m_btree(btree),
+          m_cmp(cmp),
+          m_vcmp(cmp)
     {
         assert(min_nelements() >= 2);
         assert(2 * min_nelements() - 1 <= max_nelements());
@@ -176,40 +177,39 @@ public:
         assert(unsigned(block_type::size) >= nelements + 1);                       // extra space for an overflow
     }
 
-    bool overflows() const { return block_->info.cur_size > max_nelements(); }
-    bool underflows() const { return block_->info.cur_size < min_nelements(); }
-
-    unsigned max_nelements() const { return max_size; }
-    unsigned min_nelements() const { return min_size; }
+    bool overflows() const { return m_block->info.cur_size > max_nelements(); }
+    bool underflows() const { return m_block->info.cur_size < min_nelements(); }
 
+    static unsigned max_nelements() { return max_size; }
+    static unsigned min_nelements() { return min_size; }
 
     bid_type & succ()
     {
-        return block_->info.succ;
+        return m_block->info.succ;
     }
     bid_type & pred()
     {
-        return block_->info.pred;
+        return m_block->info.pred;
     }
 
     const bid_type & succ() const
     {
-        return block_->info.succ;
+        return m_block->info.succ;
     }
     const bid_type & pred() const
     {
-        return block_->info.pred;
+        return m_block->info.pred;
     }
 
     /*
        template <class InputIterator>
        normal_leaf(InputIterator begin_, InputIterator end_,
-            btree_type * btree__,
+            btree_type * btree,
             key_compare cmp):
-            block_(new block_type),
-            btree_(btree__),
-            cmp_(cmp),
-            vcmp_(cmp)
+            m_block(new block_type),
+            m_btree(btree),
+            m_cmp(cmp),
+            m_vcmp(cmp)
        {
             assert(min_nelements() >=2);
             assert(2*min_nelements() - 1 <= max_nelements());
@@ -220,30 +220,30 @@ public:
             assert(new_size <= max_nelements());
             assert(new_size >= min_nelements());
 
-            std::copy(begin_,end_,block_->begin());
-            assert(stxxl::is_sorted(block_->begin(),block_->begin() + new_size, vcmp_));
-            block_->info.cur_size = new_size;
+            std::copy(begin_,end_,m_block->begin());
+            assert(stxxl::is_sorted(m_block->begin(),m_block->begin() + new_size, m_vcmp));
+            m_block->info.cur_size = new_size;
        }*/
 
     unsigned size() const
     {
-        return block_->info.cur_size;
+        return m_block->info.cur_size;
     }
 
     const bid_type & my_bid() const
     {
-        return block_->info.me;
+        return m_block->info.me;
     }
 
     void save()
     {
-        request_ptr req = block_->write(my_bid());
+        request_ptr req = m_block->write(my_bid());
         req->wait();
     }
 
     request_ptr load(const bid_type& bid)
     {
-        request_ptr req = block_->read(bid);
+        request_ptr req = m_block->read(bid);
         req->wait();
         assert(bid == my_bid());
         return req;
@@ -251,45 +251,45 @@ public:
 
     request_ptr prefetch(const bid_type& bid)
     {
-        return block_->read(bid);
+        return m_block->read(bid);
     }
 
     void init(const bid_type& my_bid_)
     {
-        block_->info.me = my_bid_;
-        block_->info.succ = bid_type();
-        block_->info.pred = bid_type();
-        block_->info.cur_size = 0;
+        m_block->info.me = my_bid_;
+        m_block->info.succ = bid_type();
+        m_block->info.pred = bid_type();
+        m_block->info.cur_size = 0;
     }
 
-    reference operator [] (int i)
+    reference operator [] (unsigned_type i)
     {
-        return (*block_)[i];
+        return (*m_block)[i];
     }
 
-    const_reference operator [] (int i) const
+    const_reference operator [] (unsigned_type i) const
     {
-        return (*block_)[i];
+        return (*m_block)[i];
     }
 
     reference back()
     {
-        return (*block_)[size() - 1];
+        return (*m_block)[size() - 1];
     }
 
     reference front()
     {
-        return *(block_->begin());
+        return *(m_block->begin());
     }
 
     const_reference back() const
     {
-        return (*block_)[size() - 1];
+        return (*m_block)[size() - 1];
     }
 
     const_reference front() const
     {
-        return *(block_->begin());
+        return *(m_block->begin());
     }
 
     void dump()
@@ -307,17 +307,17 @@ public:
         splitter.first = key_compare::max_value();
 
         typename block_type::iterator it =
-            std::lower_bound(block_->begin(), block_->begin() + size(), x, vcmp_);
+            std::lower_bound(m_block->begin(), m_block->begin() + size(), x, m_vcmp);
 
-        if (!(vcmp_(*it, x) || vcmp_(x, *it)) && it != (block_->begin() + size()))                    // *it == x
+        if (!(m_vcmp(*it, x) || m_vcmp(x, *it)) && it != (m_block->begin() + size()))                    // *it == x
         {
             // already exists
             return std::pair<iterator, bool>(
-                iterator(btree_, my_bid(), unsigned(it - block_->begin())),
+                iterator(m_btree, my_bid(), unsigned(it - m_block->begin())),
                 false);
         }
 
-        typename block_type::iterator cur = block_->begin() + size() - 1;
+        typename block_type::iterator cur = m_block->begin() + size() - 1;
 
         for ( ; cur >= it; --cur)
             *(cur + 1) = *cur;
@@ -325,19 +325,19 @@ public:
 
         *it = x;
 
-        std::vector<iterator_base*> Iterators2Fix;
-        btree_->iterator_map_.find(my_bid(), unsigned(it - block_->begin()), size(), Iterators2Fix);
-        typename std::vector<iterator_base*>::iterator it2fix = Iterators2Fix.begin();
-        for ( ; it2fix != Iterators2Fix.end(); ++it2fix)
+        std::vector<iterator_base*> iterators2fix;
+        m_btree->m_iterator_map.find(my_bid(), unsigned(it - m_block->begin()), size(), iterators2fix);
+        typename std::vector<iterator_base*>::iterator it2fix = iterators2fix.begin();
+        for ( ; it2fix != iterators2fix.end(); ++it2fix)
         {
-            btree_->iterator_map_.unregister_iterator(**it2fix);
-            ++((*it2fix)->pos);                            // fixing iterators
-            btree_->iterator_map_.register_iterator(**it2fix);
+            m_btree->m_iterator_map.unregister_iterator(**it2fix);
+            ++((*it2fix)->pos);                          // fixing iterators
+            m_btree->m_iterator_map.register_iterator(**it2fix);
         }
 
-        ++(block_->info.cur_size);
+        ++(m_block->info.cur_size);
 
-        std::pair<iterator, bool> result(iterator(btree_, my_bid(), unsigned(it - block_->begin())), true);
+        std::pair<iterator, bool> result(iterator(m_btree, my_bid(), unsigned(it - m_block->begin())), true);
 
         if (size() <= max_nelements())
         {
@@ -355,17 +355,17 @@ public:
 
     iterator begin()
     {
-        return iterator(btree_, my_bid(), 0);
+        return iterator(m_btree, my_bid(), 0);
     }
 
     const_iterator begin() const
     {
-        return const_iterator(btree_, my_bid(), 0);
+        return const_iterator(m_btree, my_bid(), 0);
     }
 
     iterator end()
     {
-        return iterator(btree_, my_bid(), size());
+        return iterator(m_btree, my_bid(), size());
     }
 
     void increment_iterator(iterator_base& it) const
@@ -373,7 +373,7 @@ public:
         assert(it.bid == my_bid());
         assert(it.pos != size());
 
-        btree_->iterator_map_.unregister_iterator(it);
+        m_btree->m_iterator_map.unregister_iterator(it);
 
         ++(it.pos);
         if (it.pos == size() && succ().valid())
@@ -382,324 +382,318 @@ public:
             STXXL_VERBOSE1("btree::normal_leaf jumping to the next block");
             it.pos = 0;
             it.bid = succ();
-        } else if (it.pos == 1 && btree_->prefetching_enabled_)                        // increment of pos from 0 to 1
+        }
+        // increment of pos from 0 to 1
+        else if (it.pos == 1 && m_btree->m_prefetching_enabled)
         {
             // prefetch the succ leaf
             if (succ().valid())
-                btree_->leaf_cache_.prefetch_node(succ());
+                m_btree->m_leaf_cache.prefetch_node(succ());
         }
-        btree_->iterator_map_.register_iterator(it);
+        m_btree->m_iterator_map.register_iterator(it);
     }
 
     void decrement_iterator(iterator_base& it) const
     {
         assert(it.bid == my_bid());
 
-        btree_->iterator_map_.unregister_iterator(it);
+        m_btree->m_iterator_map.unregister_iterator(it);
 
         if (it.pos == 0)
         {
             assert(pred().valid());
 
             it.bid = pred();
-            normal_leaf const* PredLeaf = btree_->leaf_cache_.get_const_node(pred(), true);
-            assert(PredLeaf);
-            it.pos = PredLeaf->size() - 1;
-
-            // prefetch the pred leaf of PredLeaf
-            if (btree_->prefetching_enabled_ && PredLeaf->pred().valid())
-                btree_->leaf_cache_.prefetch_node(PredLeaf->pred());
+            normal_leaf const* pred_leaf = m_btree->m_leaf_cache.get_const_node(pred(), true);
+            assert(pred_leaf);
+            it.pos = pred_leaf->size() - 1;
 
+            // prefetch the pred leaf of pred_leaf
+            if (m_btree->m_prefetching_enabled && pred_leaf->pred().valid())
+                m_btree->m_leaf_cache.prefetch_node(pred_leaf->pred());
 
-            btree_->leaf_cache_.unfix_node(pred());
+            m_btree->m_leaf_cache.unfix_node(pred());
         }
         else
             --it.pos;
 
-
-        btree_->iterator_map_.register_iterator(it);
+        m_btree->m_iterator_map.register_iterator(it);
     }
 
     iterator find(const key_type& k)
     {
-        value_type searchVal(k, data_type());
+        value_type search_val(k, data_type());
         typename block_type::iterator lb =
-            std::lower_bound(block_->begin(), block_->begin() + size(), searchVal, vcmp_);
-        if (lb == block_->begin() + size() || lb->first != k)
-            return btree_->end();
-
+            std::lower_bound(m_block->begin(), m_block->begin() + size(), search_val, m_vcmp);
+        if (lb == m_block->begin() + size() || lb->first != k)
+            return m_btree->end();
 
-        return iterator(btree_, my_bid(), unsigned(lb - block_->begin()));
+        return iterator(m_btree, my_bid(), unsigned(lb - m_block->begin()));
     }
 
     const_iterator find(const key_type& k) const
     {
-        value_type searchVal(k, data_type());
+        value_type search_val(k, data_type());
         typename block_type::iterator lb =
-            std::lower_bound(block_->begin(), block_->begin() + size(), searchVal, vcmp_);
-        if (lb == block_->begin() + size() || lb->first != k)
-            return btree_->end();
+            std::lower_bound(m_block->begin(), m_block->begin() + size(), search_val, m_vcmp);
+        if (lb == m_block->begin() + size() || lb->first != k)
+            return m_btree->end();
 
-
-        return const_iterator(btree_, my_bid(), unsigned(lb - block_->begin()));
+        return const_iterator(m_btree, my_bid(), unsigned(lb - m_block->begin()));
     }
 
     iterator lower_bound(const key_type& k)
     {
-        value_type searchVal(k, data_type());
+        value_type search_val(k, data_type());
 
         typename block_type::iterator lb =
-            std::lower_bound(block_->begin(), block_->begin() + size(), searchVal, vcmp_);
+            std::lower_bound(m_block->begin(), m_block->begin() + size(), search_val, m_vcmp);
 
         // lower_bound is in the succ block
-        if (lb == block_->begin() + size() && succ().valid())
+        if (lb == m_block->begin() + size() && succ().valid())
         {
-            return iterator(btree_, succ(), 0);
+            return iterator(m_btree, succ(), 0);
         }
 
-        return iterator(btree_, my_bid(), unsigned(lb - block_->begin()));
+        return iterator(m_btree, my_bid(), unsigned(lb - m_block->begin()));
     }
 
     const_iterator lower_bound(const key_type& k) const
     {
-        value_type searchVal(k, data_type());
+        value_type search_val(k, data_type());
         typename block_type::iterator lb =
-            std::lower_bound(block_->begin(), block_->begin() + size(), searchVal, vcmp_);
+            std::lower_bound(m_block->begin(), m_block->begin() + size(), search_val, m_vcmp);
 
         // lower_bound is in the succ block
-        if (lb == block_->begin() + size() && succ().valid())
+        if (lb == m_block->begin() + size() && succ().valid())
         {
-            return iterator(btree_, succ(), 0);
+            return iterator(m_btree, succ(), 0);
         }
 
-        return const_iterator(btree_, my_bid(), unsigned(lb - block_->begin()));
+        return const_iterator(m_btree, my_bid(), unsigned(lb - m_block->begin()));
     }
 
     iterator upper_bound(const key_type& k)
     {
-        value_type searchVal(k, data_type());
+        value_type search_val(k, data_type());
         typename block_type::iterator lb =
-            std::upper_bound(block_->begin(), block_->begin() + size(), searchVal, vcmp_);
+            std::upper_bound(m_block->begin(), m_block->begin() + size(), search_val, m_vcmp);
 
         // upper_bound is in the succ block
-        if (lb == block_->begin() + size() && succ().valid())
+        if (lb == m_block->begin() + size() && succ().valid())
         {
-            return iterator(btree_, succ(), 0);
+            return iterator(m_btree, succ(), 0);
         }
 
-        return iterator(btree_, my_bid(), unsigned(lb - block_->begin()));
+        return iterator(m_btree, my_bid(), unsigned(lb - m_block->begin()));
     }
 
     const_iterator upper_bound(const key_type& k) const
     {
-        value_type searchVal(k, data_type());
+        value_type search_val(k, data_type());
         typename block_type::iterator lb =
-            std::upper_bound(block_->begin(), block_->begin() + size(), searchVal, vcmp_);
+            std::upper_bound(m_block->begin(), m_block->begin() + size(), search_val, m_vcmp);
 
         // upper_bound is in the succ block
-        if (lb == block_->begin() + size() && succ().valid())
+        if (lb == m_block->begin() + size() && succ().valid())
         {
-            return const_iterator(btree_, succ(), 0);
+            return const_iterator(m_btree, succ(), 0);
         }
 
-        return const_iterator(btree_, my_bid(), unsigned(lb - block_->begin()));
+        return const_iterator(m_btree, my_bid(), unsigned(lb - m_block->begin()));
     }
 
     size_type erase(const key_type& k)
     {
-        value_type searchVal(k, data_type());
+        value_type search_val(k, data_type());
         typename block_type::iterator it =
-            std::lower_bound(block_->begin(), block_->begin() + size(), searchVal, vcmp_);
+            std::lower_bound(m_block->begin(), m_block->begin() + size(), search_val, m_vcmp);
 
-        if (it == block_->begin() + size() || it->first != k)
+        if (it == m_block->begin() + size() || it->first != k)
             return 0;
         // no such element
 
         // move elements one position left
-        std::copy(it + 1, block_->begin() + size(), it);
+        std::copy(it + 1, m_block->begin() + size(), it);
 
-        std::vector<iterator_base*> Iterators2Fix;
-        btree_->iterator_map_.find(my_bid(), unsigned(it + 1 - block_->begin()), size(), Iterators2Fix);
-        typename std::vector<iterator_base*>::iterator it2fix = Iterators2Fix.begin();
-        for ( ; it2fix != Iterators2Fix.end(); ++it2fix)
+        std::vector<iterator_base*> iterators2fix;
+        m_btree->m_iterator_map.find(my_bid(), unsigned(it + 1 - m_block->begin()), size(), iterators2fix);
+        typename std::vector<iterator_base*>::iterator it2fix = iterators2fix.begin();
+        for ( ; it2fix != iterators2fix.end(); ++it2fix)
         {
             STXXL_VERBOSE2("btree::normal_leaf updating iterator " << (*it2fix) << " (pos--)");
-            btree_->iterator_map_.unregister_iterator(**it2fix);
-            --((*it2fix)->pos);                            // fixing iterators
-            btree_->iterator_map_.register_iterator(**it2fix);
+            m_btree->m_iterator_map.unregister_iterator(**it2fix);
+            --((*it2fix)->pos);                          // fixing iterators
+            m_btree->m_iterator_map.register_iterator(**it2fix);
         }
 
-        --(block_->info.cur_size);
+        --(m_block->info.cur_size);
 
         return 1;
     }
 
-    void fuse(const normal_leaf& Src)
+    void fuse(const normal_leaf& src)
     {
         STXXL_VERBOSE1("btree::normal_leaf Fusing");
-        assert(vcmp_(Src.back(), front()));
-        const unsigned SrcSize = Src.size();
+        assert(m_vcmp(src.back(), front()));
+        const unsigned src_size = src.size();
 
-        typename block_type::iterator cur = block_->begin() + size() - 1;
-        typename block_type::const_iterator begin = block_->begin();
+        typename block_type::iterator cur = m_block->begin() + size() - 1;
+        typename block_type::const_iterator begin = m_block->begin();
 
         for ( ; cur >= begin; --cur)
-            *(cur + SrcSize) = *cur;
+            *(cur + src_size) = *cur;
         // move elements to make space for Src elements
 
         // copy Src to *this leaf
-        std::copy(Src.block_->begin(), Src.block_->begin() + SrcSize, block_->begin());
-
+        std::copy(src.m_block->begin(), src.m_block->begin() + src_size, m_block->begin());
 
-        std::vector<iterator_base*> Iterators2Fix;
-        btree_->iterator_map_.find(my_bid(), 0, size(), Iterators2Fix);
-        typename std::vector<iterator_base*>::iterator it2fix = Iterators2Fix.begin();
-        for ( ; it2fix != Iterators2Fix.end(); ++it2fix)
+        std::vector<iterator_base*> iterators2fix;
+        m_btree->m_iterator_map.find(my_bid(), 0, size(), iterators2fix);
+        typename std::vector<iterator_base*>::iterator it2fix = iterators2fix.begin();
+        for ( ; it2fix != iterators2fix.end(); ++it2fix)
         {
             STXXL_VERBOSE2("btree::normal_leaf updating iterator " << (*it2fix) <<
-                           " (pos+" << SrcSize << ")");
-            btree_->iterator_map_.unregister_iterator(**it2fix);
-            ((*it2fix)->pos) += SrcSize;                           // fixing iterators
-            btree_->iterator_map_.register_iterator(**it2fix);
+                           " (pos+" << src_size << ")");
+            m_btree->m_iterator_map.unregister_iterator(**it2fix);
+            ((*it2fix)->pos) += src_size;                           // fixing iterators
+            m_btree->m_iterator_map.register_iterator(**it2fix);
         }
 
-        Iterators2Fix.clear();
-        btree_->iterator_map_.find(Src.my_bid(), 0, SrcSize, Iterators2Fix);
-        it2fix = Iterators2Fix.begin();
-        for ( ; it2fix != Iterators2Fix.end(); ++it2fix)
+        iterators2fix.clear();
+        m_btree->m_iterator_map.find(src.my_bid(), 0, src_size, iterators2fix);
+        for (it2fix = iterators2fix.begin(); it2fix != iterators2fix.end(); ++it2fix)
         {
             STXXL_VERBOSE2("btree::normal_leaf updating iterator " << (*it2fix) <<
                            " (bid=" << my_bid() << ")");
-            btree_->iterator_map_.unregister_iterator(**it2fix);
+            m_btree->m_iterator_map.unregister_iterator(**it2fix);
             ((*it2fix)->bid) = my_bid();                             // fixing iterators
-            btree_->iterator_map_.register_iterator(**it2fix);
+            m_btree->m_iterator_map.register_iterator(**it2fix);
         }
 
-        block_->info.cur_size += SrcSize;
+        m_block->info.cur_size += src_size;
 
         // update links
-        pred() = Src.pred();
+        pred() = src.pred();
         if (pred().valid())
         {                         // update successor link
-            normal_leaf* NewPred = btree_->leaf_cache_.get_node(pred());
-            assert(NewPred);
-            NewPred->succ() = my_bid();
+            normal_leaf* new_pred = m_btree->m_leaf_cache.get_node(pred());
+            assert(new_pred);
+            new_pred->succ() = my_bid();
         }
     }
 
-    key_type balance(normal_leaf& Left)
+    key_type balance(normal_leaf& left)
     {
         STXXL_VERBOSE1("btree::normal_leaf Balancing leaves with bids " <<
-                       Left.my_bid() << " and " << my_bid());
-        const unsigned TotalSize = Left.size() + size();
-        unsigned newLeftSize = TotalSize / 2;
-        assert(newLeftSize <= Left.max_nelements());
-        assert(newLeftSize >= Left.min_nelements());
-        unsigned newRightSize = TotalSize - newLeftSize;
-        assert(newRightSize <= max_nelements());
-        assert(newRightSize >= min_nelements());
-
-        assert(vcmp_(Left.back(), front()) || size() == 0);
-
-        if (newLeftSize < Left.size())
+                       left.my_bid() << " and " << my_bid());
+        const unsigned total_size = left.size() + size();
+        unsigned new_left_size = total_size / 2;
+        assert(new_left_size <= left.max_nelements());
+        assert(new_left_size >= left.min_nelements());
+        unsigned new_right_size = total_size - new_left_size;
+        assert(new_right_size <= max_nelements());
+        assert(new_right_size >= min_nelements());
+
+        assert(m_vcmp(left.back(), front()) || size() == 0);
+
+        if (new_left_size < left.size())
         {
-            const unsigned nEl2Move = Left.size() - newLeftSize;                            // #elements to move from Left to *this
+            // #elements to move from left to *this
+            const unsigned nEl2Move = left.size() - new_left_size;
 
-            typename block_type::iterator cur = block_->begin() + size() - 1;
-            typename block_type::const_iterator begin = block_->begin();
+            typename block_type::iterator cur = m_block->begin() + size() - 1;
+            typename block_type::const_iterator begin = m_block->begin();
 
             for ( ; cur >= begin; --cur)
                 *(cur + nEl2Move) = *cur;
             // move elements to make space for Src elements
 
             // copy Left to *this leaf
-            std::copy(Left.block_->begin() + newLeftSize,
-                      Left.block_->begin() + Left.size(), block_->begin());
+            std::copy(left.m_block->begin() + new_left_size,
+                      left.m_block->begin() + left.size(), m_block->begin());
 
-            std::vector<iterator_base*> Iterators2Fix1;
-            std::vector<iterator_base*> Iterators2Fix2;
-            btree_->iterator_map_.find(my_bid(), 0, size(), Iterators2Fix1);
-            btree_->iterator_map_.find(Left.my_bid(), newLeftSize, Left.size(), Iterators2Fix2);
+            std::vector<iterator_base*> iterators2fix1;
+            std::vector<iterator_base*> iterators2fix2;
+            m_btree->m_iterator_map.find(my_bid(), 0, size(), iterators2fix1);
+            m_btree->m_iterator_map.find(left.my_bid(), new_left_size, left.size(), iterators2fix2);
 
-            typename std::vector<iterator_base*>::iterator it2fix = Iterators2Fix1.begin();
-            for ( ; it2fix != Iterators2Fix1.end(); ++it2fix)
+            typename std::vector<iterator_base*>::iterator it2fix = iterators2fix1.begin();
+            for ( ; it2fix != iterators2fix1.end(); ++it2fix)
             {
                 STXXL_VERBOSE2("btree::normal_leaf updating iterator " << (*it2fix) <<
                                " (pos+" << nEl2Move << ")");
-                btree_->iterator_map_.unregister_iterator(**it2fix);
+                m_btree->m_iterator_map.unregister_iterator(**it2fix);
                 ((*it2fix)->pos) += nEl2Move;                               // fixing iterators
-                btree_->iterator_map_.register_iterator(**it2fix);
+                m_btree->m_iterator_map.register_iterator(**it2fix);
             }
 
-
-            it2fix = Iterators2Fix2.begin();
-            for ( ; it2fix != Iterators2Fix2.end(); ++it2fix)
+            it2fix = iterators2fix2.begin();
+            for ( ; it2fix != iterators2fix2.end(); ++it2fix)
             {
                 STXXL_VERBOSE2("btree::normal_leaf updating iterator " << (*it2fix) <<
-                               " (pos-" << newLeftSize << " bid=" << my_bid() << ")");
-                btree_->iterator_map_.unregister_iterator(**it2fix);
+                               " (pos-" << new_left_size << " bid=" << my_bid() << ")");
+                m_btree->m_iterator_map.unregister_iterator(**it2fix);
                 ((*it2fix)->bid) = my_bid();                                     // fixing iterators
-                ((*it2fix)->pos) -= newLeftSize;                                 // fixing iterators
-                btree_->iterator_map_.register_iterator(**it2fix);
+                ((*it2fix)->pos) -= new_left_size;                               // fixing iterators
+                m_btree->m_iterator_map.register_iterator(**it2fix);
             }
         }
         else
         {
-            assert(newRightSize < size());
+            assert(new_right_size < size());
 
-            const unsigned nEl2Move = size() - newRightSize;                            // #elements to move from *this to Left
+            const unsigned nEl2Move = size() - new_right_size;                            // #elements to move from *this to Left
 
             // copy *this to Left
-            std::copy(block_->begin(),
-                      block_->begin() + nEl2Move, Left.block_->begin() + Left.size());
+            std::copy(m_block->begin(),
+                      m_block->begin() + nEl2Move, left.m_block->begin() + left.size());
             // move elements in *this
-            std::copy(block_->begin() + nEl2Move,
-                      block_->begin() + size(), block_->begin());
+            std::copy(m_block->begin() + nEl2Move,
+                      m_block->begin() + size(), m_block->begin());
 
-            std::vector<iterator_base*> Iterators2Fix1;
-            std::vector<iterator_base*> Iterators2Fix2;
-            btree_->iterator_map_.find(my_bid(), nEl2Move, size(), Iterators2Fix1);
-            btree_->iterator_map_.find(my_bid(), 0, nEl2Move - 1, Iterators2Fix2);
+            std::vector<iterator_base*> iterators2fix1;
+            std::vector<iterator_base*> iterators2fix2;
+            m_btree->m_iterator_map.find(my_bid(), nEl2Move, size(), iterators2fix1);
+            m_btree->m_iterator_map.find(my_bid(), 0, nEl2Move - 1, iterators2fix2);
 
-            typename std::vector<iterator_base*>::iterator it2fix = Iterators2Fix1.begin();
-            for ( ; it2fix != Iterators2Fix1.end(); ++it2fix)
+            typename std::vector<iterator_base*>::iterator it2fix = iterators2fix1.begin();
+            for ( ; it2fix != iterators2fix1.end(); ++it2fix)
             {
                 STXXL_VERBOSE2("btree::normal_leaf updating iterator " << (*it2fix) <<
                                " (pos-" << nEl2Move << ")");
-                btree_->iterator_map_.unregister_iterator(**it2fix);
+                m_btree->m_iterator_map.unregister_iterator(**it2fix);
                 ((*it2fix)->pos) -= nEl2Move;                                 // fixing iterators
-                btree_->iterator_map_.register_iterator(**it2fix);
+                m_btree->m_iterator_map.register_iterator(**it2fix);
             }
 
-
-            it2fix = Iterators2Fix2.begin();
-            for ( ; it2fix != Iterators2Fix2.end(); ++it2fix)
+            it2fix = iterators2fix2.begin();
+            for ( ; it2fix != iterators2fix2.end(); ++it2fix)
             {
                 STXXL_VERBOSE2("btree::normal_leaf updating iterator " << (*it2fix) <<
-                               " (pos+" << Left.size() << " bid=" << Left.my_bid() << ")");
-                btree_->iterator_map_.unregister_iterator(**it2fix);
-                ((*it2fix)->bid) = Left.my_bid();                                 // fixing iterators
-                ((*it2fix)->pos) += Left.size();                                  // fixing iterators
-                btree_->iterator_map_.register_iterator(**it2fix);
+                               " (pos+" << left.size() << " bid=" << left.my_bid() << ")");
+                m_btree->m_iterator_map.unregister_iterator(**it2fix);
+                ((*it2fix)->bid) = left.my_bid();                                 // fixing iterators
+                ((*it2fix)->pos) += left.size();                                  // fixing iterators
+                m_btree->m_iterator_map.register_iterator(**it2fix);
             }
         }
 
-        block_->info.cur_size = newRightSize;                             // update size
-        Left.block_->info.cur_size = newLeftSize;                         // update size
+        m_block->info.cur_size = new_right_size;                             // update size
+        left.m_block->info.cur_size = new_left_size;                         // update size
 
-        return Left.back().first;
+        return left.back().first;
     }
 
     void push_back(const value_type& x)
     {
         (*this)[size()] = x;
-        ++(block_->info.cur_size);
+        ++(m_block->info.cur_size);
     }
 };
 
 } // namespace btree
 
-
 STXXL_END_NAMESPACE
 
 #endif // !STXXL_CONTAINERS_BTREE_LEAF_HEADER
diff --git a/include/stxxl/bits/containers/btree/node.h b/include/stxxl/bits/containers/btree/node.h
index 7e8dd1b..13dec7b 100644
--- a/include/stxxl/bits/containers/btree/node.h
+++ b/include/stxxl/bits/containers/btree/node.h
@@ -16,7 +16,6 @@
 #include <stxxl/bits/containers/btree/iterator.h>
 #include <stxxl/bits/containers/btree/node_cache.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 namespace btree {
@@ -24,34 +23,33 @@ namespace btree {
 template <class NodeType, class BTreeType>
 class node_cache;
 
-template <class KeyType_, class KeyCmp_, unsigned RawSize_, class BTreeType>
+template <class KeyType, class KeyCmp, unsigned RawSize, class BTreeType>
 class normal_node : private noncopyable
 {
 public:
-    typedef normal_node<KeyType_, KeyCmp_, RawSize_, BTreeType> SelfType;
+    typedef normal_node<KeyType, KeyCmp, RawSize, BTreeType> self_type;
 
-    friend class node_cache<SelfType, BTreeType>;
+    friend class node_cache<self_type, BTreeType>;
 
-    typedef KeyType_ key_type;
-    typedef KeyCmp_ key_compare;
+    typedef KeyType key_type;
+    typedef KeyCmp key_compare;
 
     enum {
-        raw_size = RawSize_
+        raw_size = RawSize
     };
     typedef BID<raw_size> bid_type;
     typedef bid_type node_bid_type;
-    typedef SelfType node_type;
+    typedef self_type node_type;
     typedef std::pair<key_type, bid_type> value_type;
     typedef value_type& reference;
     typedef const value_type& const_reference;
 
-
-    struct InfoType
+    struct metainfo_type
     {
         bid_type me;
         unsigned cur_size;
     };
-    typedef typed_block<raw_size, value_type, 0, InfoType> block_type;
+    typedef typed_block<raw_size, value_type, 0, metainfo_type> block_type;
 
     enum {
         nelements = block_type::size - 1,
@@ -85,11 +83,10 @@ private:
         }
     };
 
-    block_type* block_;
-    btree_type* btree_;
-    key_compare cmp_;
-    value_compare vcmp_;
-
+    block_type* m_block;
+    btree_type* m_btree;
+    key_compare m_cmp;
+    value_compare m_vcmp;
 
     std::pair<key_type, bid_type> insert(const std::pair<key_type, bid_type>& splitter,
                                          const block_iterator& place2insert)
@@ -97,43 +94,42 @@ private:
         std::pair<key_type, bid_type> result(key_compare::max_value(), bid_type());
 
         // splitter != *place2insert
-        assert(vcmp_(*place2insert, splitter) || vcmp_(splitter, *place2insert));
+        assert(m_vcmp(*place2insert, splitter) || m_vcmp(splitter, *place2insert));
 
-        block_iterator cur = block_->begin() + size() - 1;
+        block_iterator cur = m_block->begin() + size() - 1;
         for ( ; cur >= place2insert; --cur)
             *(cur + 1) = *cur;
         // copy elements to make space for the new element
 
         *place2insert = splitter;               // insert
 
-        ++(block_->info.cur_size);
+        ++(m_block->info.cur_size);
 
         if (size() > max_nelements())           // overflow! need to split
         {
             STXXL_VERBOSE1("btree::normal_node::insert overflow happened, splitting");
 
-            bid_type NewBid;
-            btree_->node_cache_.get_new_node(NewBid);                             // new (left) node
-            normal_node* NewNode = btree_->node_cache_.get_node(NewBid, true);
-            assert(NewNode);
+            bid_type new_bid;
+            m_btree->m_node_cache.get_new_node(new_bid);                             // new (left) node
+            normal_node* new_node = m_btree->m_node_cache.get_node(new_bid, true);
+            assert(new_node);
 
             const unsigned end_of_smaller_part = size() / 2;
 
-            result.first = ((*block_)[end_of_smaller_part - 1]).first;
-            result.second = NewBid;
-
+            result.first = ((*m_block)[end_of_smaller_part - 1]).first;
+            result.second = new_bid;
 
             const unsigned old_size = size();
             // copy the smaller part
-            std::copy(block_->begin(), block_->begin() + end_of_smaller_part, NewNode->block_->begin());
-            NewNode->block_->info.cur_size = end_of_smaller_part;
+            std::copy(m_block->begin(), m_block->begin() + end_of_smaller_part, new_node->m_block->begin());
+            new_node->m_block->info.cur_size = end_of_smaller_part;
             // copy the larger part
-            std::copy(block_->begin() + end_of_smaller_part,
-                      block_->begin() + old_size, block_->begin());
-            block_->info.cur_size = old_size - end_of_smaller_part;
-            assert(size() + NewNode->size() == old_size);
+            std::copy(m_block->begin() + end_of_smaller_part,
+                      m_block->begin() + old_size, m_block->begin());
+            m_block->info.cur_size = old_size - end_of_smaller_part;
+            assert(size() + new_node->size() == old_size);
 
-            btree_->node_cache_.unfix_node(NewBid);
+            m_btree->m_node_cache.unfix_node(new_bid);
 
             STXXL_VERBOSE1("btree::normal_node split leaf " << this
                                                             << " splitter: " << result.first);
@@ -143,15 +139,15 @@ private:
     }
 
     template <class CacheType>
-    void fuse_or_balance(block_iterator UIt, CacheType& cache_)
+    void fuse_or_balance(block_iterator UIt, CacheType& cache)
     {
         typedef typename CacheType::node_type local_node_type;
         typedef typename local_node_type::bid_type local_bid_type;
 
         block_iterator leftIt, rightIt;
-        if (UIt == (block_->begin() + size() - 1))                      // UIt is the last entry in the root
+        if (UIt == (m_block->begin() + size() - 1))                      // UIt is the last entry in the root
         {
-            assert(UIt != block_->begin());
+            assert(UIt != m_block->begin());
             rightIt = UIt;
             leftIt = --UIt;
         }
@@ -159,81 +155,86 @@ private:
         {
             leftIt = UIt;
             rightIt = ++UIt;
-            assert(rightIt != (block_->begin() + size()));
+            assert(rightIt != (m_block->begin() + size()));
         }
 
         // now fuse or balance nodes pointed by leftIt and rightIt
-        local_bid_type LeftBid = (local_bid_type)leftIt->second;
-        local_bid_type RightBid = (local_bid_type)rightIt->second;
-        local_node_type* LeftNode = cache_.get_node(LeftBid, true);
-        local_node_type* RightNode = cache_.get_node(RightBid, true);
+        local_bid_type left_bid = (local_bid_type)leftIt->second;
+        local_bid_type right_bid = (local_bid_type)rightIt->second;
+        local_node_type* left_node = cache.get_node(left_bid, true);
+        local_node_type* right_node = cache.get_node(right_bid, true);
 
-        const unsigned TotalSize = LeftNode->size() + RightNode->size();
-        if (TotalSize <= RightNode->max_nelements())
+        const unsigned total_size = left_node->size() + right_node->size();
+        if (total_size <= right_node->max_nelements())
         {
-            // fuse
-            RightNode->fuse(*LeftNode);                                         // add the content of LeftNode to RightNode
+            // --- fuse ---
+
+            // add the content of left_node to right_node
+            right_node->fuse(*left_node);
 
-            cache_.unfix_node(RightBid);
-            cache_.delete_node(LeftBid);                                        // 'delete_node' unfixes LeftBid also
+            cache.unfix_node(right_bid);
+            // 'delete_node' unfixes left-bid also
+            cache.delete_node(left_bid);
 
-            std::copy(leftIt + 1, block_->begin() + size(), leftIt);            // delete left BID from the root
-            --(block_->info.cur_size);
+            // delete left BID from the root
+            std::copy(leftIt + 1, m_block->begin() + size(), leftIt);
+            --(m_block->info.cur_size);
         }
         else
         {
-            // balance
-
-            key_type NewSplitter = RightNode->balance(*LeftNode);
+            // --- balance ---
 
+            key_type new_splitter = right_node->balance(*left_node);
 
-            leftIt->first = NewSplitter;                             // change key
-            assert(vcmp_(*leftIt, *rightIt));
+            // change key
+            leftIt->first = new_splitter;
+            assert(m_vcmp(*leftIt, *rightIt));
 
-            cache_.unfix_node(LeftBid);
-            cache_.unfix_node(RightBid);
+            cache.unfix_node(left_bid);
+            cache.unfix_node(right_bid);
         }
     }
 
 public:
     virtual ~normal_node()
     {
-        delete block_;
+        delete m_block;
     }
 
-    normal_node(btree_type* btree__,
-                key_compare cmp) :
-        block_(new block_type),
-        btree_(btree__),
-        cmp_(cmp),
-        vcmp_(cmp)
+    normal_node(btree_type* btree,
+                key_compare cmp)
+        : m_block(new block_type),
+          m_btree(btree),
+          m_cmp(cmp),
+          m_vcmp(cmp)
     {
         assert(min_nelements() >= 2);
         assert(2 * min_nelements() - 1 <= max_nelements());
         assert(max_nelements() <= nelements);
-        assert(unsigned(block_type::size) >= nelements + 1);                       // extra space for an overflow
+        // extra space for an overflow
+        assert(unsigned(block_type::size) >= nelements + 1);
     }
 
     block_type & block()
     {
-        return *block_;
+        return *m_block;
     }
 
-    bool overflows() const { return block_->info.cur_size > max_nelements(); }
-    bool underflows() const { return block_->info.cur_size < min_nelements(); }
+    bool overflows() const { return m_block->info.cur_size > max_nelements(); }
+    bool underflows() const { return m_block->info.cur_size < min_nelements(); }
 
-    unsigned max_nelements() const { return max_size; }
-    unsigned min_nelements() const { return min_size; }
+    static unsigned max_nelements() { return max_size; }
+    static unsigned min_nelements() { return min_size; }
 
     /*
        template <class InputIterator>
        normal_node(InputIterator begin_, InputIterator end_,
-            btree_type * btree__,
+            btree_type * btree,
             key_compare cmp):
-            block_(new block_type),
-            btree_(btree__),
-            cmp_(cmp),
-            vcmp_(cmp)
+            m_block(new block_type),
+            m_btree(btree),
+            m_cmp(cmp),
+            m_vcmp(cmp)
        {
             assert(min_nelements() >=2);
             assert(2*min_nelements() - 1 <= max_nelements());
@@ -244,30 +245,30 @@ public:
             assert(new_size <= max_nelements());
             assert(new_size >= min_nelements());
 
-            std::copy(begin_,end_,block_->begin());
-            assert(stxxl::is_sorted(block_->begin(),block_->begin() + new_size, vcmp_));
-            block_->info.cur_size = new_size;
+            std::copy(begin_,end_,m_block->begin());
+            assert(stxxl::is_sorted(m_block->begin(),m_block->begin() + new_size, m_vcmp));
+            m_block->info.cur_size = new_size;
        }*/
 
     unsigned size() const
     {
-        return block_->info.cur_size;
+        return m_block->info.cur_size;
     }
 
     bid_type my_bid() const
     {
-        return block_->info.me;
+        return m_block->info.me;
     }
 
     void save()
     {
-        request_ptr req = block_->write(my_bid());
+        request_ptr req = m_block->write(my_bid());
         req->wait();
     }
 
     request_ptr load(const bid_type& bid)
     {
-        request_ptr req = block_->read(bid);
+        request_ptr req = m_block->read(bid);
         req->wait();
         assert(bid == my_bid());
         return req;
@@ -275,100 +276,97 @@ public:
 
     request_ptr prefetch(const bid_type& bid)
     {
-        return block_->read(bid);
+        return m_block->read(bid);
     }
 
     void init(const bid_type& my_bid_)
     {
-        block_->info.me = my_bid_;
-        block_->info.cur_size = 0;
+        m_block->info.me = my_bid_;
+        m_block->info.cur_size = 0;
     }
 
     reference operator [] (int i)
     {
-        return (*block_)[i];
+        return (*m_block)[i];
     }
 
     const_reference operator [] (int i) const
     {
-        return (*block_)[i];
+        return (*m_block)[i];
     }
 
     reference back()
     {
-        return (*block_)[size() - 1];
+        return (*m_block)[size() - 1];
     }
 
     reference front()
     {
-        return *(block_->begin());
+        return *(m_block->begin());
     }
 
     const_reference back() const
     {
-        return (*block_)[size() - 1];
+        return (*m_block)[size() - 1];
     }
 
     const_reference front() const
     {
-        return *(block_->begin());
+        return *(m_block->begin());
     }
 
-
-    std::pair<iterator, bool> insert(
-        const btree_value_type& x,
-        unsigned height,
-        std::pair<key_type, bid_type>& splitter)
+    std::pair<iterator, bool>
+    insert(const btree_value_type& x, unsigned height,
+           std::pair<key_type, bid_type>& splitter)
     {
         assert(size() <= max_nelements());
         splitter.first = key_compare::max_value();
 
-        value_type Key2Search(x.first, bid_type());
+        value_type key2search(x.first, bid_type());
         block_iterator it =
-            std::lower_bound(block_->begin(), block_->begin() + size(), Key2Search, vcmp_);
+            std::lower_bound(m_block->begin(), m_block->begin() + size(), key2search, m_vcmp);
 
-        assert(it != (block_->begin() + size()));
+        assert(it != (m_block->begin() + size()));
 
-        bid_type found_bid = it->second;
-        STXXL_UNUSED(found_bid);
+        //bid_type found_bid = it->second;
 
         if (height == 2)                        // found_bid points to a leaf
         {
             STXXL_VERBOSE1("btree::normal_node Inserting new value into a leaf");
-            leaf_type* Leaf = btree_->leaf_cache_.get_node((leaf_bid_type)it->second, true);
-            assert(Leaf);
-            std::pair<key_type, leaf_bid_type> BotSplitter;
-            std::pair<iterator, bool> result = Leaf->insert(x, BotSplitter);
-            btree_->leaf_cache_.unfix_node((leaf_bid_type)it->second);
+            leaf_type* leaf = m_btree->m_leaf_cache.get_node((leaf_bid_type)it->second, true);
+            assert(leaf);
+            std::pair<key_type, leaf_bid_type> bot_splitter;
+            std::pair<iterator, bool> result = leaf->insert(x, bot_splitter);
+            m_btree->m_leaf_cache.unfix_node((leaf_bid_type)it->second);
             //if(key_compare::max_value() == BotSplitter.first)
-            if (!(cmp_(key_compare::max_value(), BotSplitter.first) ||
-                  cmp_(BotSplitter.first, key_compare::max_value())))
+            if (!(m_cmp(key_compare::max_value(), bot_splitter.first) ||
+                  m_cmp(bot_splitter.first, key_compare::max_value())))
                 return result;
             // no overflow/splitting happened
 
             STXXL_VERBOSE1("btree::normal_node Inserting new value in *this");
 
-            splitter = insert(std::make_pair(BotSplitter.first, bid_type(BotSplitter.second)), it);
+            splitter = insert(std::make_pair(bot_splitter.first, bid_type(bot_splitter.second)), it);
 
             return result;
         }
         else
         {                               // found_bid points to a node
             STXXL_VERBOSE1("btree::normal_node Inserting new value into a node");
-            node_type* Node = btree_->node_cache_.get_node((node_bid_type)it->second, true);
-            assert(Node);
-            std::pair<key_type, node_bid_type> BotSplitter;
-            std::pair<iterator, bool> result = Node->insert(x, height - 1, BotSplitter);
-            btree_->node_cache_.unfix_node((node_bid_type)it->second);
+            node_type* node = m_btree->m_node_cache.get_node((node_bid_type)it->second, true);
+            assert(node);
+            std::pair<key_type, node_bid_type> bot_splitter;
+            std::pair<iterator, bool> result = node->insert(x, height - 1, bot_splitter);
+            m_btree->m_node_cache.unfix_node((node_bid_type)it->second);
             //if(key_compare::max_value() == BotSplitter.first)
-            if (!(cmp_(key_compare::max_value(), BotSplitter.first) ||
-                  cmp_(BotSplitter.first, key_compare::max_value())))
+            if (!(m_cmp(key_compare::max_value(), bot_splitter.first) ||
+                  m_cmp(bot_splitter.first, key_compare::max_value())))
                 return result;
             // no overflow/splitting happened
 
             STXXL_VERBOSE1("btree::normal_node Inserting new value in *this");
 
-            splitter = insert(BotSplitter, it);
+            splitter = insert(bot_splitter, it);
 
             return result;
         }
@@ -376,314 +374,315 @@ public:
 
     iterator begin(unsigned height)
     {
-        bid_type FirstBid = block_->begin()->second;
+        bid_type first_bid = m_block->begin()->second;
         if (height == 2)                        // FirstBid points to a leaf
         {
             assert(size() > 1);
             STXXL_VERBOSE1("btree::node retrieveing begin() from the first leaf");
-            leaf_type* Leaf = btree_->leaf_cache_.get_node((leaf_bid_type)FirstBid);
-            assert(Leaf);
-            return Leaf->begin();
+            leaf_type* leaf = m_btree->m_leaf_cache.get_node((leaf_bid_type)first_bid);
+            assert(leaf);
+            return leaf->begin();
         }
         else
         {                         // FirstBid points to a node
             STXXL_VERBOSE1("btree: retrieveing begin() from the first node");
-            node_type* Node = btree_->node_cache_.get_node((node_bid_type)FirstBid, true);
-            assert(Node);
-            iterator result = Node->begin(height - 1);
-            btree_->node_cache_.unfix_node((node_bid_type)FirstBid);
+            node_type* node = m_btree->m_node_cache.get_node((node_bid_type)first_bid, true);
+            assert(node);
+            iterator result = node->begin(height - 1);
+            m_btree->m_node_cache.unfix_node((node_bid_type)first_bid);
             return result;
         }
     }
 
     const_iterator begin(unsigned height) const
     {
-        bid_type FirstBid = block_->begin()->second;
+        bid_type FirstBid = m_block->begin()->second;
         if (height == 2)                        // FirstBid points to a leaf
         {
             assert(size() > 1);
             STXXL_VERBOSE1("btree::node retrieveing begin() from the first leaf");
-            leaf_type const* Leaf = btree_->leaf_cache_.get_const_node((leaf_bid_type)FirstBid);
-            assert(Leaf);
-            return Leaf->begin();
+            const leaf_type* leaf = m_btree->m_leaf_cache.get_const_node((leaf_bid_type)FirstBid);
+            assert(leaf);
+            return leaf->begin();
         }
         else
         {                         // FirstBid points to a node
             STXXL_VERBOSE1("btree: retrieveing begin() from the first node");
-            node_type const* Node = btree_->node_cache_.get_const_node((node_bid_type)FirstBid, true);
-            assert(Node);
-            const_iterator result = Node->begin(height - 1);
-            btree_->node_cache_.unfix_node((node_bid_type)FirstBid);
+            const node_type* node = m_btree->m_node_cache.get_const_node((node_bid_type)FirstBid, true);
+            assert(node);
+            const_iterator result = node->begin(height - 1);
+            m_btree->m_node_cache.unfix_node((node_bid_type)FirstBid);
             return result;
         }
     }
 
     iterator find(const key_type& k, unsigned height)
     {
-        value_type Key2Search(k, bid_type());
+        value_type key2search(k, bid_type());
 
         block_iterator it =
-            std::lower_bound(block_->begin(), block_->begin() + size(), Key2Search, vcmp_);
+            std::lower_bound(m_block->begin(), m_block->begin() + size(), key2search, m_vcmp);
 
-        assert(it != (block_->begin() + size()));
+        assert(it != (m_block->begin() + size()));
 
         bid_type found_bid = it->second;
 
         if (height == 2)                // found_bid points to a leaf
         {
             STXXL_VERBOSE1("Searching in a leaf");
-            leaf_type* Leaf = btree_->leaf_cache_.get_node((leaf_bid_type)found_bid, true);
-            assert(Leaf);
-            iterator result = Leaf->find(k);
-            btree_->leaf_cache_.unfix_node((leaf_bid_type)found_bid);
+            leaf_type* leaf = m_btree->m_leaf_cache.get_node((leaf_bid_type)found_bid, true);
+            assert(leaf);
+            iterator result = leaf->find(k);
+            m_btree->m_leaf_cache.unfix_node((leaf_bid_type)found_bid);
 
             return result;
         }
 
         // found_bid points to a node
         STXXL_VERBOSE1("Searching in a node");
-        node_type* Node = btree_->node_cache_.get_node((node_bid_type)found_bid, true);
-        assert(Node);
-        iterator result = Node->find(k, height - 1);
-        btree_->node_cache_.unfix_node((node_bid_type)found_bid);
+        node_type* node = m_btree->m_node_cache.get_node((node_bid_type)found_bid, true);
+        assert(node);
+        iterator result = node->find(k, height - 1);
+        m_btree->m_node_cache.unfix_node((node_bid_type)found_bid);
 
         return result;
     }
 
     const_iterator find(const key_type& k, unsigned height) const
     {
-        value_type Key2Search(k, bid_type());
+        value_type key2search(k, bid_type());
 
         block_iterator it =
-            std::lower_bound(block_->begin(), block_->begin() + size(), Key2Search, vcmp_);
+            std::lower_bound(m_block->begin(), m_block->begin() + size(), key2search, m_vcmp);
 
-        assert(it != (block_->begin() + size()));
+        assert(it != (m_block->begin() + size()));
 
         bid_type found_bid = it->second;
 
         if (height == 2)                // found_bid points to a leaf
         {
             STXXL_VERBOSE1("Searching in a leaf");
-            leaf_type const* Leaf = btree_->leaf_cache_.get_const_node((leaf_bid_type)found_bid, true);
-            assert(Leaf);
-            const_iterator result = Leaf->find(k);
-            btree_->leaf_cache_.unfix_node((leaf_bid_type)found_bid);
+            const leaf_type* leaf = m_btree->m_leaf_cache.get_const_node((leaf_bid_type)found_bid, true);
+            assert(leaf);
+            const_iterator result = leaf->find(k);
+            m_btree->m_leaf_cache.unfix_node((leaf_bid_type)found_bid);
 
             return result;
         }
 
         // found_bid points to a node
         STXXL_VERBOSE1("Searching in a node");
-        node_type const* Node = btree_->node_cache_.get_const_node((node_bid_type)found_bid, true);
-        assert(Node);
-        const_iterator result = Node->find(k, height - 1);
-        btree_->node_cache_.unfix_node((node_bid_type)found_bid);
+        const node_type* node = m_btree->m_node_cache.get_const_node((node_bid_type)found_bid, true);
+        assert(node);
+        const_iterator result = node->find(k, height - 1);
+        m_btree->m_node_cache.unfix_node((node_bid_type)found_bid);
 
         return result;
     }
 
     iterator lower_bound(const key_type& k, unsigned height)
     {
-        value_type Key2Search(k, bid_type());
-        assert(!vcmp_(back(), Key2Search));
+        value_type key2search(k, bid_type());
+        assert(!m_vcmp(back(), key2search));
         block_iterator it =
-            std::lower_bound(block_->begin(), block_->begin() + size(), Key2Search, vcmp_);
+            std::lower_bound(m_block->begin(), m_block->begin() + size(), key2search, m_vcmp);
 
-        assert(it != (block_->begin() + size()));
+        assert(it != (m_block->begin() + size()));
 
         bid_type found_bid = it->second;
 
         if (height == 2)                // found_bid points to a leaf
         {
             STXXL_VERBOSE1("Searching lower bound in a leaf");
-            leaf_type* Leaf = btree_->leaf_cache_.get_node((leaf_bid_type)found_bid, true);
-            assert(Leaf);
-            iterator result = Leaf->lower_bound(k);
-            btree_->leaf_cache_.unfix_node((leaf_bid_type)found_bid);
+            leaf_type* leaf = m_btree->m_leaf_cache.get_node((leaf_bid_type)found_bid, true);
+            assert(leaf);
+            iterator result = leaf->lower_bound(k);
+            m_btree->m_leaf_cache.unfix_node((leaf_bid_type)found_bid);
 
             return result;
         }
 
         // found_bid points to a node
         STXXL_VERBOSE1("Searching lower bound in a node");
-        node_type* Node = btree_->node_cache_.get_node((node_bid_type)found_bid, true);
-        assert(Node);
-        iterator result = Node->lower_bound(k, height - 1);
-        btree_->node_cache_.unfix_node((node_bid_type)found_bid);
+        node_type* node = m_btree->m_node_cache.get_node((node_bid_type)found_bid, true);
+        assert(node);
+        iterator result = node->lower_bound(k, height - 1);
+        m_btree->m_node_cache.unfix_node((node_bid_type)found_bid);
 
         return result;
     }
 
     const_iterator lower_bound(const key_type& k, unsigned height) const
     {
-        value_type Key2Search(k, bid_type());
-        assert(!vcmp_(back(), Key2Search));
+        value_type key2search(k, bid_type());
+        assert(!m_vcmp(back(), key2search));
         block_iterator it =
-            std::lower_bound(block_->begin(), block_->begin() + size(), Key2Search, vcmp_);
+            std::lower_bound(m_block->begin(), m_block->begin() + size(), key2search, m_vcmp);
 
-        assert(it != (block_->begin() + size()));
+        assert(it != (m_block->begin() + size()));
 
         bid_type found_bid = it->second;
 
         if (height == 2)                // found_bid points to a leaf
         {
             STXXL_VERBOSE1("Searching lower bound in a leaf");
-            leaf_type const* Leaf = btree_->leaf_cache_.get_const_node((leaf_bid_type)found_bid, true);
-            assert(Leaf);
-            const_iterator result = Leaf->lower_bound(k);
-            btree_->leaf_cache_.unfix_node((leaf_bid_type)found_bid);
+            const leaf_type* leaf = m_btree->m_leaf_cache.get_const_node((leaf_bid_type)found_bid, true);
+            assert(leaf);
+            const_iterator result = leaf->lower_bound(k);
+            m_btree->m_leaf_cache.unfix_node((leaf_bid_type)found_bid);
 
             return result;
         }
 
         // found_bid points to a node
         STXXL_VERBOSE1("Searching lower bound in a node");
-        node_type const* Node = btree_->node_cache_.get_const_node((node_bid_type)found_bid, true);
-        assert(Node);
-        const_iterator result = Node->lower_bound(k, height - 1);
-        btree_->node_cache_.unfix_node((node_bid_type)found_bid);
+        const node_type* node = m_btree->m_node_cache.get_const_node((node_bid_type)found_bid, true);
+        assert(node);
+        const_iterator result = node->lower_bound(k, height - 1);
+        m_btree->m_node_cache.unfix_node((node_bid_type)found_bid);
 
         return result;
     }
 
     iterator upper_bound(const key_type& k, unsigned height)
     {
-        value_type Key2Search(k, bid_type());
-        assert(vcmp_(Key2Search, back()));
+        value_type key2search(k, bid_type());
+        assert(m_vcmp(key2search, back()));
         block_iterator it =
-            std::upper_bound(block_->begin(), block_->begin() + size(), Key2Search, vcmp_);
+            std::upper_bound(m_block->begin(), m_block->begin() + size(), key2search, m_vcmp);
 
-        assert(it != (block_->begin() + size()));
+        assert(it != (m_block->begin() + size()));
 
         bid_type found_bid = it->second;
 
         if (height == 2)                // found_bid points to a leaf
         {
             STXXL_VERBOSE1("Searching upper bound in a leaf");
-            leaf_type* Leaf = btree_->leaf_cache_.get_node((leaf_bid_type)found_bid, true);
-            assert(Leaf);
-            iterator result = Leaf->upper_bound(k);
-            btree_->leaf_cache_.unfix_node((leaf_bid_type)found_bid);
+            leaf_type* leaf = m_btree->m_leaf_cache.get_node((leaf_bid_type)found_bid, true);
+            assert(leaf);
+            iterator result = leaf->upper_bound(k);
+            m_btree->m_leaf_cache.unfix_node((leaf_bid_type)found_bid);
 
             return result;
         }
 
         // found_bid points to a node
         STXXL_VERBOSE1("Searching upper bound in a node");
-        node_type* Node = btree_->node_cache_.get_node((node_bid_type)found_bid, true);
-        assert(Node);
-        iterator result = Node->upper_bound(k, height - 1);
-        btree_->node_cache_.unfix_node((node_bid_type)found_bid);
+        node_type* node = m_btree->m_node_cache.get_node((node_bid_type)found_bid, true);
+        assert(node);
+        iterator result = node->upper_bound(k, height - 1);
+        m_btree->m_node_cache.unfix_node((node_bid_type)found_bid);
 
         return result;
     }
 
     const_iterator upper_bound(const key_type& k, unsigned height) const
     {
-        value_type Key2Search(k, bid_type());
-        assert(vcmp_(Key2Search, back()));
+        value_type key2search(k, bid_type());
+        assert(m_vcmp(key2search, back()));
         block_iterator it =
-            std::upper_bound(block_->begin(), block_->begin() + size(), Key2Search, vcmp_);
+            std::upper_bound(m_block->begin(), m_block->begin() + size(), key2search, m_vcmp);
 
-        assert(it != (block_->begin() + size()));
+        assert(it != (m_block->begin() + size()));
 
         bid_type found_bid = it->second;
 
         if (height == 2)                // found_bid points to a leaf
         {
             STXXL_VERBOSE1("Searching upper bound in a leaf");
-            leaf_type const* Leaf = btree_->leaf_cache_.get_const_node((leaf_bid_type)found_bid, true);
-            assert(Leaf);
-            const_iterator result = Leaf->upper_bound(k);
-            btree_->leaf_cache_.unfix_node((leaf_bid_type)found_bid);
+            const leaf_type* leaf = m_btree->m_leaf_cache.get_const_node((leaf_bid_type)found_bid, true);
+            assert(leaf);
+            const_iterator result = leaf->upper_bound(k);
+            m_btree->m_leaf_cache.unfix_node((leaf_bid_type)found_bid);
 
             return result;
         }
 
         // found_bid points to a node
         STXXL_VERBOSE1("Searching upper bound in a node");
-        node_type const* Node = btree_->node_cache_.get_const_node((node_bid_type)found_bid, true);
-        assert(Node);
-        const_iterator result = Node->upper_bound(k, height - 1);
-        btree_->node_cache_.unfix_node((node_bid_type)found_bid);
+        const node_type* node = m_btree->m_node_cache.get_const_node((node_bid_type)found_bid, true);
+        assert(node);
+        const_iterator result = node->upper_bound(k, height - 1);
+        m_btree->m_node_cache.unfix_node((node_bid_type)found_bid);
 
         return result;
     }
 
-    void fuse(const normal_node& Src)
+    void fuse(const normal_node& src)
     {
-        assert(vcmp_(Src.back(), front()));
-        const unsigned SrcSize = Src.size();
+        assert(m_vcmp(src.back(), front()));
+        const unsigned src_size = src.size();
 
-        block_iterator cur = block_->begin() + size() - 1;
-        block_const_iterator begin = block_->begin();
+        block_iterator cur = m_block->begin() + size() - 1;
+        block_const_iterator begin = m_block->begin();
 
         for ( ; cur >= begin; --cur)
-            *(cur + SrcSize) = *cur;
+            *(cur + src_size) = *cur;
         // move elements to make space for Src elements
 
         // copy Src to *this leaf
-        std::copy(Src.block_->begin(), Src.block_->begin() + SrcSize, block_->begin());
+        std::copy(src.m_block->begin(), src.m_block->begin() + src_size, m_block->begin());
 
-        block_->info.cur_size += SrcSize;
+        m_block->info.cur_size += src_size;
     }
 
-
-    key_type balance(normal_node& Left)
+    key_type balance(normal_node& left, bool check_constraints = true)
     {
-        const unsigned TotalSize = Left.size() + size();
-        unsigned newLeftSize = TotalSize / 2;
-        assert(newLeftSize <= Left.max_nelements());
-        assert(newLeftSize >= Left.min_nelements());
-        unsigned newRightSize = TotalSize - newLeftSize;
-        assert(newRightSize <= max_nelements());
-        assert(newRightSize >= min_nelements());
+        const unsigned total_size = left.size() + size();
+        unsigned new_left_size = total_size / 2;
+        STXXL_ASSERT(!check_constraints || new_left_size <= left.max_nelements());
+        STXXL_ASSERT(!check_constraints || new_left_size >= left.min_nelements());
+        unsigned new_right_size = total_size - new_left_size;
+        STXXL_ASSERT(!check_constraints || new_right_size <= max_nelements());
+        STXXL_ASSERT(!check_constraints || new_right_size >= min_nelements());
 
-        assert(vcmp_(Left.back(), front()) || size() == 0);
+        assert(m_vcmp(left.back(), front()) || size() == 0);
 
-        if (newLeftSize < Left.size())
+        if (new_left_size < left.size())
         {
-            const unsigned nEl2Move = Left.size() - newLeftSize;                            // #elements to move from Left to *this
+            // #elements to move from left to *this
+            const unsigned nEl2Move = left.size() - new_left_size;
 
-            block_iterator cur = block_->begin() + size() - 1;
-            block_const_iterator begin = block_->begin();
+            block_iterator cur = m_block->begin() + size() - 1;
+            block_const_iterator begin = m_block->begin();
 
             for ( ; cur >= begin; --cur)
                 *(cur + nEl2Move) = *cur;
             // move elements to make space for Src elements
 
-            // copy Left to *this leaf
-            std::copy(Left.block_->begin() + newLeftSize,
-                      Left.block_->begin() + Left.size(), block_->begin());
+            // copy left to *this leaf
+            std::copy(left.m_block->begin() + new_left_size,
+                      left.m_block->begin() + left.size(), m_block->begin());
         }
         else
         {
-            assert(newRightSize < size());
+            assert(new_right_size < size());
 
-            const unsigned nEl2Move = size() - newRightSize;                            // #elements to move from *this to Left
+            // #elements to move from *this to left
+            const unsigned nEl2Move = size() - new_right_size;
 
-            // copy *this to Left
-            std::copy(block_->begin(),
-                      block_->begin() + nEl2Move, Left.block_->begin() + Left.size());
+            // copy *this to left
+            std::copy(m_block->begin(),
+                      m_block->begin() + nEl2Move, left.m_block->begin() + left.size());
             // move elements in *this
-            std::copy(block_->begin() + nEl2Move,
-                      block_->begin() + size(), block_->begin());
+            std::copy(m_block->begin() + nEl2Move,
+                      m_block->begin() + size(), m_block->begin());
         }
 
-        block_->info.cur_size = newRightSize;                             // update size
-        Left.block_->info.cur_size = newLeftSize;                         // update size
+        m_block->info.cur_size = new_right_size;                           // update size
+        left.m_block->info.cur_size = new_left_size;                       // update size
 
-        return Left.back().first;
+        return left.back().first;
     }
 
     size_type erase(const key_type& k, unsigned height)
     {
-        value_type Key2Search(k, bid_type());
+        value_type key2search(k, bid_type());
 
         block_iterator it =
-            std::lower_bound(block_->begin(), block_->begin() + size(), Key2Search, vcmp_);
+            std::lower_bound(m_block->begin(), m_block->begin() + size(), key2search, m_vcmp);
 
-        assert(it != (block_->begin() + size()));
+        assert(it != (m_block->begin() + size()));
 
         bid_type found_bid = it->second;
 
@@ -692,32 +691,32 @@ public:
         if (height == 2)                        // 'found_bid' points to a leaf
         {
             STXXL_VERBOSE1("btree::normal_node Deleting key from a leaf");
-            leaf_type* Leaf = btree_->leaf_cache_.get_node((leaf_bid_type)found_bid, true);
-            assert(Leaf);
-            size_type result = Leaf->erase(k);
-            btree_->leaf_cache_.unfix_node((leaf_bid_type)it->second);
-            if (!Leaf->underflows())
+            leaf_type* leaf = m_btree->m_leaf_cache.get_node((leaf_bid_type)found_bid, true);
+            assert(leaf);
+            size_type result = leaf->erase(k);
+            m_btree->m_leaf_cache.unfix_node((leaf_bid_type)it->second);
+            if (!leaf->underflows())
                 return result;
             // no underflow or root has a special degree 1 (too few elements)
 
             STXXL_VERBOSE1("btree::normal_node Fusing or rebalancing a leaf");
-            fuse_or_balance(it, btree_->leaf_cache_);
+            fuse_or_balance(it, m_btree->m_leaf_cache);
 
             return result;
         }
 
         // 'found_bid' points to a node
         STXXL_VERBOSE1("btree::normal_node Deleting key from a node");
-        node_type* Node = btree_->node_cache_.get_node((node_bid_type)found_bid, true);
-        assert(Node);
-        size_type result = Node->erase(k, height - 1);
-        btree_->node_cache_.unfix_node((node_bid_type)found_bid);
-        if (!Node->underflows())
+        node_type* node = m_btree->m_node_cache.get_node((node_bid_type)found_bid, true);
+        assert(node);
+        size_type result = node->erase(k, height - 1);
+        m_btree->m_node_cache.unfix_node((node_bid_type)found_bid);
+        if (!node->underflows())
             return result;
         // no underflow happened
 
         STXXL_VERBOSE1("btree::normal_node Fusing or rebalancing a node");
-        fuse_or_balance(it, btree_->node_cache_);
+        fuse_or_balance(it, m_btree->m_node_cache);
 
         return result;
     }
@@ -727,23 +726,23 @@ public:
         if (height == 2)
         {
             // we have children leaves here
-            block_const_iterator it = block().begin();
-            for ( ; it != block().begin() + size(); ++it)
+            for (block_const_iterator it = block().begin();
+                 it != block().begin() + size(); ++it)
             {
                 // delete from leaf cache and deallocate bid
-                btree_->leaf_cache_.delete_node((leaf_bid_type)it->second);
+                m_btree->m_leaf_cache.delete_node((leaf_bid_type)it->second);
             }
         }
         else
         {
-            block_const_iterator it = block().begin();
-            for ( ; it != block().begin() + size(); ++it)
+            for (block_const_iterator it = block().begin();
+                 it != block().begin() + size(); ++it)
             {
-                node_type* Node = btree_->node_cache_.get_node((node_bid_type)it->second);
-                assert(Node);
-                Node->deallocate_children(height - 1);
+                node_type* node = m_btree->m_node_cache.get_node((node_bid_type)it->second);
+                assert(node);
+                node->deallocate_children(height - 1);
                 // delete from node cache and deallocate bid
-                btree_->node_cache_.delete_node((node_bid_type)it->second);
+                m_btree->m_node_cache.delete_node((node_bid_type)it->second);
             }
         }
     }
@@ -751,14 +750,12 @@ public:
     void push_back(const value_type& x)
     {
         (*this)[size()] = x;
-        ++(block_->info.cur_size);
+        ++(m_block->info.cur_size);
     }
 };
 
 } // namespace btree
 
-
 STXXL_END_NAMESPACE
 
-
 #endif // !STXXL_CONTAINERS_BTREE_NODE_HEADER
diff --git a/include/stxxl/bits/containers/btree/node_cache.h b/include/stxxl/bits/containers/btree/node_cache.h
index b98997f..69cbfd3 100644
--- a/include/stxxl/bits/containers/btree/node_cache.h
+++ b/include/stxxl/bits/containers/btree/node_cache.h
@@ -21,9 +21,10 @@
 #include <stxxl/bits/containers/pager.h>
 #include <stxxl/bits/common/error_handling.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
+#define STXXL_BTREE_CACHE_VERBOSE STXXL_VERBOSE2
+
 // TODO:  speedup BID2node_ access using search result iterator in the methods
 
 namespace btree {
@@ -42,8 +43,8 @@ public:
     typedef stxxl::lru_pager<> pager_type;
 
 private:
-    btree_type* btree_;
-    key_compare comp_;
+    btree_type* m_btree;
+    key_compare m_cmp;
 
 /*
         struct bid_comp
@@ -60,7 +61,7 @@ private:
         size_t operator () (const bid_type& bid) const
         {
             size_t result =
-                longhash1(bid.offset + uint64(unsigned_type(bid.storage)));
+                longhash1(bid.offset + reinterpret_cast<uint64>(bid.storage));
             return result;
         }
 #if STXXL_MSVC
@@ -76,20 +77,20 @@ private:
 #endif
     };
 
-    std::vector<node_type*> nodes_;
-    std::vector<request_ptr> reqs_;
-    std::vector<bool> fixed_;
-    std::vector<bool> dirty_;
-    std::vector<int_type> free_nodes_;
+    std::vector<node_type*> m_nodes;
+    std::vector<request_ptr> m_reqs;
+    std::vector<bool> m_fixed;
+    std::vector<bool> m_dirty;
+    std::vector<int_type> m_free_nodes;
     typedef typename compat_hash_map<bid_type, int_type, bid_hash>::result hash_map_type;
 
     //typedef std::map<bid_type,int_type,bid_comp> BID2node_type;
-    typedef hash_map_type BID2node_type;
+    typedef hash_map_type bid2node_type;
 
-    BID2node_type BID2node_;
-    pager_type pager_;
-    block_manager* bm;
-    alloc_strategy_type alloc_strategy_;
+    bid2node_type m_bid2node;
+    pager_type m_pager;
+    block_manager* m_bm;
+    alloc_strategy_type m_alloc_strategy;
 
     int64 n_found;
     int64 n_not_found;
@@ -102,64 +103,63 @@ private:
     // changes btree pointer in all contained iterators
     void change_btree_pointers(btree_type* b)
     {
-        typename std::vector<node_type*>::const_iterator it = nodes_.begin();
-        for ( ; it != nodes_.end(); ++it)
+        for (typename std::vector<node_type*>::const_iterator it = m_nodes.begin();
+             it != m_nodes.end(); ++it)
         {
-            (*it)->btree_ = b;
+            (*it)->m_btree = b;
         }
     }
 
 public:
     node_cache(unsigned_type cache_size_in_bytes,
-               btree_type* btree__,
-               key_compare comp__
-               ) :
-        btree_(btree__),
-        comp_(comp__),
-        bm(block_manager::get_instance()),
-        n_found(0),
-        n_not_found(0),
-        n_created(0),
-        n_deleted(0),
-        n_read(0),
-        n_written(0),
-        n_clean_forced(0)
+               btree_type* btree,
+               key_compare cmp)
+        : m_btree(btree),
+          m_cmp(cmp),
+          m_bm(block_manager::get_instance()),
+          n_found(0),
+          n_not_found(0),
+          n_created(0),
+          n_deleted(0),
+          n_read(0),
+          n_written(0),
+          n_clean_forced(0)
     {
         const unsigned_type nnodes = cache_size_in_bytes / block_type::raw_size;
-        STXXL_VERBOSE1("btree::node_cache constructor nodes=" << nnodes);
+        STXXL_BTREE_CACHE_VERBOSE("btree::node_cache constructor nodes=" << nnodes);
         if (nnodes < 3)
         {
             STXXL_THROW2(std::runtime_error, "btree::node_cache::node_cache", "Too few memory for a node cache (<3)");
         }
-        nodes_.reserve(nnodes);
-        reqs_.resize(nnodes);
-        free_nodes_.reserve(nnodes);
-        fixed_.resize(nnodes, false);
-        dirty_.resize(nnodes, true);
+        m_nodes.reserve(nnodes);
+        m_reqs.resize(nnodes);
+        m_free_nodes.reserve(nnodes);
+        m_fixed.resize(nnodes, false);
+        m_dirty.resize(nnodes, true);
         for (unsigned_type i = 0; i < nnodes; ++i)
         {
-            nodes_.push_back(new node_type(btree_, comp_));
-            free_nodes_.push_back(i);
+            m_nodes.push_back(new node_type(m_btree, m_cmp));
+            m_free_nodes.push_back(i);
         }
 
         pager_type tmp_pager(nnodes);
-        std::swap(pager_, tmp_pager);
+        std::swap(m_pager, tmp_pager);
     }
 
     unsigned_type size() const
     {
-        return nodes_.size();
+        return m_nodes.size();
     }
 
     // returns the number of fixed pages
     unsigned_type nfixed() const
     {
-        typename BID2node_type::const_iterator i = BID2node_.begin();
-        typename BID2node_type::const_iterator end = BID2node_.end();
+        typename bid2node_type::const_iterator i = m_bid2node.begin();
+        typename bid2node_type::const_iterator end = m_bid2node.end();
         unsigned_type cnt = 0;
         for ( ; i != end; ++i)
         {
-            if (fixed_[(*i).second])
+            if (m_fixed[(*i).second])
                 ++cnt;
         }
         return cnt;
@@ -167,27 +167,27 @@ public:
 
     ~node_cache()
     {
-        STXXL_VERBOSE1("btree::node_cache destructor addr=" << this);
-        typename BID2node_type::const_iterator i = BID2node_.begin();
-        typename BID2node_type::const_iterator end = BID2node_.end();
+        STXXL_BTREE_CACHE_VERBOSE("btree::node_cache destructor addr=" << this);
+        typename bid2node_type::const_iterator i = m_bid2node.begin();
+        typename bid2node_type::const_iterator end = m_bid2node.end();
         for ( ; i != end; ++i)
         {
             const unsigned_type p = (*i).second;
-            if (reqs_[p].valid())
-                reqs_[p]->wait();
+            if (m_reqs[p].valid())
+                m_reqs[p]->wait();
 
-            if (dirty_[p])
-                nodes_[p]->save();
+            if (m_dirty[p])
+                m_nodes[p]->save();
         }
         for (unsigned_type i = 0; i < size(); ++i)
-            delete nodes_[i];
+            delete m_nodes[i];
     }
 
     node_type * get_new_node(bid_type& new_bid)
     {
         ++n_created;
 
-        if (free_nodes_.empty())
+        if (m_free_nodes.empty())
         {
             // need to kick a node
             int_type node2kick;
@@ -196,7 +196,7 @@ public:
             do
             {
                 ++i;
-                node2kick = pager_.kick();
+                node2kick = m_pager.kick();
                 if (i == max_tries)
                 {
                     STXXL_ERRMSG(
@@ -204,94 +204,88 @@ public:
                     STXXL_ERRMSG("Returning NULL node.");
                     return NULL;
                 }
-                pager_.hit(node2kick);
-            } while (fixed_[node2kick]);
-
+                m_pager.hit(node2kick);
+            } while (m_fixed[node2kick]);
 
-            if (reqs_[node2kick].valid())
-                reqs_[node2kick]->wait();
+            if (m_reqs[node2kick].valid())
+                m_reqs[node2kick]->wait();
 
+            node_type& node = *(m_nodes[node2kick]);
 
-            node_type& Node = *(nodes_[node2kick]);
-
-            if (dirty_[node2kick])
+            if (m_dirty[node2kick])
             {
-                Node.save();
+                node.save();
                 ++n_written;
             }
             else
                 ++n_clean_forced;
 
-
             //reqs_[node2kick] = request_ptr(); // reset request
 
-            assert(BID2node_.find(Node.my_bid()) != BID2node_.end());
-            BID2node_.erase(Node.my_bid());
-            bm->new_block(alloc_strategy_, new_bid);
+            assert(m_bid2node.find(node.my_bid()) != m_bid2node.end());
+            m_bid2node.erase(node.my_bid());
+            m_bm->new_block(m_alloc_strategy, new_bid);
 
-            BID2node_[new_bid] = node2kick;
+            m_bid2node[new_bid] = node2kick;
 
-            Node.init(new_bid);
+            node.init(new_bid);
 
-            dirty_[node2kick] = true;
+            m_dirty[node2kick] = true;
 
-            assert(size() == BID2node_.size() + free_nodes_.size());
+            assert(size() == m_bid2node.size() + m_free_nodes.size());
 
-            STXXL_VERBOSE1("btree::node_cache get_new_node, need to kick node " << node2kick);
+            STXXL_BTREE_CACHE_VERBOSE("btree::node_cache get_new_node, need to kick node " << node2kick);
 
-            return &Node;
+            return &node;
         }
 
+        int_type free_node = m_free_nodes.back();
+        m_free_nodes.pop_back();
+        assert(m_fixed[free_node] == false);
 
-        int_type free_node = free_nodes_.back();
-        free_nodes_.pop_back();
-        assert(fixed_[free_node] == false);
-
-        bm->new_block(alloc_strategy_, new_bid);
-        BID2node_[new_bid] = free_node;
-        node_type& Node = *(nodes_[free_node]);
-        Node.init(new_bid);
+        m_bm->new_block(m_alloc_strategy, new_bid);
+        m_bid2node[new_bid] = free_node;
+        node_type& node = *(m_nodes[free_node]);
+        node.init(new_bid);
 
         // assert(!(reqs_[free_node].valid()));
 
-        pager_.hit(free_node);
+        m_pager.hit(free_node);
 
-        dirty_[free_node] = true;
+        m_dirty[free_node] = true;
 
-        assert(size() == BID2node_.size() + free_nodes_.size());
+        assert(size() == m_bid2node.size() + m_free_nodes.size());
 
-        STXXL_VERBOSE1("btree::node_cache get_new_node, free node " << free_node << "available");
+        STXXL_BTREE_CACHE_VERBOSE("btree::node_cache get_new_node, free node " << free_node << "available");
 
-        return &Node;
+        return &node;
     }
 
-
     node_type * get_node(const bid_type& bid, bool fix = false)
     {
-        typename BID2node_type::const_iterator it = BID2node_.find(bid);
+        typename bid2node_type::const_iterator it = m_bid2node.find(bid);
         ++n_read;
 
-        if (it != BID2node_.end())
+        if (it != m_bid2node.end())
         {
             // the node is in cache
             const int_type nodeindex = it->second;
-            STXXL_VERBOSE1("btree::node_cache get_node, the node " << nodeindex << "is in cache , fix=" << fix);
-            fixed_[nodeindex] = fix;
-            pager_.hit(nodeindex);
-            dirty_[nodeindex] = true;
-
-            if (reqs_[nodeindex].valid() && !reqs_[nodeindex]->poll())
-                reqs_[nodeindex]->wait();
+            STXXL_BTREE_CACHE_VERBOSE("btree::node_cache get_node, the node " << nodeindex << "is in cache , fix=" << fix);
+            m_fixed[nodeindex] = fix;
+            m_pager.hit(nodeindex);
+            m_dirty[nodeindex] = true;
 
+            if (m_reqs[nodeindex].valid() && !m_reqs[nodeindex]->poll())
+                m_reqs[nodeindex]->wait();
 
             ++n_found;
-            return nodes_[nodeindex];
+            return m_nodes[nodeindex];
         }
 
         ++n_not_found;
 
         // the node is not in cache
-        if (free_nodes_.empty())
+        if (m_free_nodes.empty())
         {
             // need to kick a node
             int_type node2kick;
@@ -300,7 +294,7 @@ public:
             do
             {
                 ++i;
-                node2kick = pager_.kick();
+                node2kick = m_pager.kick();
                 if (i == max_tries)
                 {
                     STXXL_ERRMSG(
@@ -308,86 +302,83 @@ public:
                     STXXL_ERRMSG("Returning NULL node.");
                     return NULL;
                 }
-                pager_.hit(node2kick);
-            } while (fixed_[node2kick]);
+                m_pager.hit(node2kick);
+            } while (m_fixed[node2kick]);
 
-            if (reqs_[node2kick].valid())
-                reqs_[node2kick]->wait();
+            if (m_reqs[node2kick].valid())
+                m_reqs[node2kick]->wait();
 
+            node_type& node = *(m_nodes[node2kick]);
 
-            node_type& Node = *(nodes_[node2kick]);
-
-            if (dirty_[node2kick])
+            if (m_dirty[node2kick])
             {
-                Node.save();
+                node.save();
                 ++n_written;
             }
             else
                 ++n_clean_forced;
 
+            m_bid2node.erase(node.my_bid());
 
-            BID2node_.erase(Node.my_bid());
-
-            reqs_[node2kick] = Node.load(bid);
-            BID2node_[bid] = node2kick;
+            m_reqs[node2kick] = node.load(bid);
+            m_bid2node[bid] = node2kick;
 
-            fixed_[node2kick] = fix;
+            m_fixed[node2kick] = fix;
 
-            dirty_[node2kick] = true;
+            m_dirty[node2kick] = true;
 
-            assert(size() == BID2node_.size() + free_nodes_.size());
+            assert(size() == m_bid2node.size() + m_free_nodes.size());
 
-            STXXL_VERBOSE1("btree::node_cache get_node, need to kick node" << node2kick << " fix=" << fix);
+            STXXL_BTREE_CACHE_VERBOSE("btree::node_cache get_node, need to kick node" << node2kick << " fix=" << fix);
 
-            return &Node;
+            return &node;
         }
 
-        int_type free_node = free_nodes_.back();
-        free_nodes_.pop_back();
-        assert(fixed_[free_node] == false);
+        int_type free_node = m_free_nodes.back();
+        m_free_nodes.pop_back();
+        assert(m_fixed[free_node] == false);
 
-        node_type& Node = *(nodes_[free_node]);
-        reqs_[free_node] = Node.load(bid);
-        BID2node_[bid] = free_node;
+        node_type& node = *(m_nodes[free_node]);
+        m_reqs[free_node] = node.load(bid);
+        m_bid2node[bid] = free_node;
 
-        pager_.hit(free_node);
+        m_pager.hit(free_node);
 
-        fixed_[free_node] = fix;
+        m_fixed[free_node] = fix;
 
-        dirty_[free_node] = true;
+        m_dirty[free_node] = true;
 
-        assert(size() == BID2node_.size() + free_nodes_.size());
+        assert(size() == m_bid2node.size() + m_free_nodes.size());
 
-        STXXL_VERBOSE1("btree::node_cache get_node, free node " << free_node << "available, fix=" << fix);
+        STXXL_BTREE_CACHE_VERBOSE("btree::node_cache get_node, free node " << free_node << "available, fix=" << fix);
 
-        return &Node;
+        return &node;
     }
 
     node_type const * get_const_node(const bid_type& bid, bool fix = false)
     {
-        typename BID2node_type::const_iterator it = BID2node_.find(bid);
+        typename bid2node_type::const_iterator it = m_bid2node.find(bid);
         ++n_read;
 
-        if (it != BID2node_.end())
+        if (it != m_bid2node.end())
         {
             // the node is in cache
             const int_type nodeindex = it->second;
-            STXXL_VERBOSE1("btree::node_cache get_node, the node " << nodeindex << "is in cache , fix=" << fix);
-            fixed_[nodeindex] = fix;
-            pager_.hit(nodeindex);
-
-            if (reqs_[nodeindex].valid() && !reqs_[nodeindex]->poll())
-                reqs_[nodeindex]->wait();
+            STXXL_BTREE_CACHE_VERBOSE("btree::node_cache get_node, the node " << nodeindex << "is in cache , fix=" << fix);
+            m_fixed[nodeindex] = fix;
+            m_pager.hit(nodeindex);
 
+            if (m_reqs[nodeindex].valid() && !m_reqs[nodeindex]->poll())
+                m_reqs[nodeindex]->wait();
 
             ++n_found;
-            return nodes_[nodeindex];
+            return m_nodes[nodeindex];
         }
 
         ++n_not_found;
 
         // the node is not in cache
-        if (free_nodes_.empty())
+        if (m_free_nodes.empty())
         {
             // need to kick a node
             int_type node2kick;
@@ -396,7 +387,7 @@ public:
             do
             {
                 ++i;
-                node2kick = pager_.kick();
+                node2kick = m_pager.kick();
                 if (i == max_tries)
                 {
                     STXXL_ERRMSG(
@@ -404,96 +395,92 @@ public:
                     STXXL_ERRMSG("Returning NULL node.");
                     return NULL;
                 }
-                pager_.hit(node2kick);
-            } while (fixed_[node2kick]);
-
-            if (reqs_[node2kick].valid())
-                reqs_[node2kick]->wait();
+                m_pager.hit(node2kick);
+            } while (m_fixed[node2kick]);
 
+            if (m_reqs[node2kick].valid())
+                m_reqs[node2kick]->wait();
 
-            node_type& Node = *(nodes_[node2kick]);
-            if (dirty_[node2kick])
+            node_type& node = *(m_nodes[node2kick]);
+            if (m_dirty[node2kick])
             {
-                Node.save();
+                node.save();
                 ++n_written;
             }
             else
                 ++n_clean_forced;
 
+            m_bid2node.erase(node.my_bid());
 
-            BID2node_.erase(Node.my_bid());
-
-            reqs_[node2kick] = Node.load(bid);
-            BID2node_[bid] = node2kick;
+            m_reqs[node2kick] = node.load(bid);
+            m_bid2node[bid] = node2kick;
 
-            fixed_[node2kick] = fix;
+            m_fixed[node2kick] = fix;
 
-            dirty_[node2kick] = false;
+            m_dirty[node2kick] = false;
 
-            assert(size() == BID2node_.size() + free_nodes_.size());
+            assert(size() == m_bid2node.size() + m_free_nodes.size());
 
-            STXXL_VERBOSE1("btree::node_cache get_node, need to kick node" << node2kick << " fix=" << fix);
+            STXXL_BTREE_CACHE_VERBOSE("btree::node_cache get_node, need to kick node" << node2kick << " fix=" << fix);
 
-            return &Node;
+            return &node;
         }
 
-        int_type free_node = free_nodes_.back();
-        free_nodes_.pop_back();
-        assert(fixed_[free_node] == false);
+        int_type free_node = m_free_nodes.back();
+        m_free_nodes.pop_back();
+        assert(m_fixed[free_node] == false);
 
-        node_type& Node = *(nodes_[free_node]);
-        reqs_[free_node] = Node.load(bid);
-        BID2node_[bid] = free_node;
+        node_type& node = *(m_nodes[free_node]);
+        m_reqs[free_node] = node.load(bid);
+        m_bid2node[bid] = free_node;
 
-        pager_.hit(free_node);
+        m_pager.hit(free_node);
 
-        fixed_[free_node] = fix;
+        m_fixed[free_node] = fix;
 
-        dirty_[free_node] = false;
+        m_dirty[free_node] = false;
 
-        assert(size() == BID2node_.size() + free_nodes_.size());
+        assert(size() == m_bid2node.size() + m_free_nodes.size());
 
-        STXXL_VERBOSE1("btree::node_cache get_node, free node " << free_node << "available, fix=" << fix);
+        STXXL_BTREE_CACHE_VERBOSE("btree::node_cache get_node, free node " << free_node << "available, fix=" << fix);
 
-        return &Node;
+        return &node;
     }
 
     void delete_node(const bid_type& bid)
     {
-        typename BID2node_type::const_iterator it = BID2node_.find(bid);
+        typename bid2node_type::const_iterator it = m_bid2node.find(bid);
         try
         {
-            if (it != BID2node_.end())
+            if (it != m_bid2node.end())
             {
                 // the node is in the cache
                 const int_type nodeindex = it->second;
-                STXXL_VERBOSE1("btree::node_cache delete_node " << nodeindex << " from cache.");
-                if (reqs_[nodeindex].valid())
-                    reqs_[nodeindex]->wait();
+                STXXL_BTREE_CACHE_VERBOSE("btree::node_cache delete_node " << nodeindex << " from cache.");
+                if (m_reqs[nodeindex].valid())
+                    m_reqs[nodeindex]->wait();
 
                 //reqs_[nodeindex] = request_ptr(); // reset request
-                free_nodes_.push_back(nodeindex);
-                BID2node_.erase(bid);
-                fixed_[nodeindex] = false;
+                m_free_nodes.push_back(nodeindex);
+                m_bid2node.erase(bid);
+                m_fixed[nodeindex] = false;
             }
             ++n_deleted;
         } catch (const io_error& ex)
         {
-            bm->delete_block(bid);
+            m_bm->delete_block(bid);
             throw io_error(ex.what());
         }
-        bm->delete_block(bid);
+        m_bm->delete_block(bid);
     }
 
-
     void prefetch_node(const bid_type& bid)
     {
-        if (BID2node_.find(bid) != BID2node_.end())
+        if (m_bid2node.find(bid) != m_bid2node.end())
             return;
 
-
         // the node is not in cache
-        if (free_nodes_.empty())
+        if (m_free_nodes.empty())
         {
             // need to kick a node
             int_type node2kick;
@@ -502,7 +489,7 @@ public:
             do
             {
                 ++i;
-                node2kick = pager_.kick();
+                node2kick = m_pager.kick();
                 if (i == max_tries)
                 {
                     STXXL_ERRMSG(
@@ -510,80 +497,78 @@ public:
                     STXXL_ERRMSG("Returning NULL node.");
                     return;
                 }
-                pager_.hit(node2kick);
-            } while (fixed_[node2kick]);
+                m_pager.hit(node2kick);
+            } while (m_fixed[node2kick]);
 
-            if (reqs_[node2kick].valid())
-                reqs_[node2kick]->wait();
+            if (m_reqs[node2kick].valid())
+                m_reqs[node2kick]->wait();
 
+            node_type& node = *(m_nodes[node2kick]);
 
-            node_type& Node = *(nodes_[node2kick]);
-
-            if (dirty_[node2kick])
+            if (m_dirty[node2kick])
             {
-                Node.save();
+                node.save();
                 ++n_written;
             }
             else
                 ++n_clean_forced;
 
+            m_bid2node.erase(node.my_bid());
 
-            BID2node_.erase(Node.my_bid());
-
-            reqs_[node2kick] = Node.prefetch(bid);
-            BID2node_[bid] = node2kick;
+            m_reqs[node2kick] = node.prefetch(bid);
+            m_bid2node[bid] = node2kick;
 
-            fixed_[node2kick] = false;
+            m_fixed[node2kick] = false;
 
-            dirty_[node2kick] = false;
+            m_dirty[node2kick] = false;
 
-            assert(size() == BID2node_.size() + free_nodes_.size());
+            assert(size() == m_bid2node.size() + m_free_nodes.size());
 
-            STXXL_VERBOSE1("btree::node_cache prefetch_node, need to kick node" << node2kick << " ");
+            STXXL_BTREE_CACHE_VERBOSE("btree::node_cache prefetch_node, need to kick node" << node2kick << " ");
 
             return;
         }
 
-        int_type free_node = free_nodes_.back();
-        free_nodes_.pop_back();
-        assert(fixed_[free_node] == false);
+        int_type free_node = m_free_nodes.back();
+        m_free_nodes.pop_back();
+        assert(m_fixed[free_node] == false);
 
-        node_type& Node = *(nodes_[free_node]);
-        reqs_[free_node] = Node.prefetch(bid);
-        BID2node_[bid] = free_node;
+        node_type& node = *(m_nodes[free_node]);
+        m_reqs[free_node] = node.prefetch(bid);
+        m_bid2node[bid] = free_node;
 
-        pager_.hit(free_node);
+        m_pager.hit(free_node);
 
-        fixed_[free_node] = false;
+        m_fixed[free_node] = false;
 
-        dirty_[free_node] = false;
+        m_dirty[free_node] = false;
 
-        assert(size() == BID2node_.size() + free_nodes_.size());
+        assert(size() == m_bid2node.size() + m_free_nodes.size());
 
-        STXXL_VERBOSE1("btree::node_cache prefetch_node, free node " << free_node << "available");
+        STXXL_BTREE_CACHE_VERBOSE("btree::node_cache prefetch_node, free node " << free_node << "available");
 
         return;
     }
 
     void unfix_node(const bid_type& bid)
     {
-        assert(BID2node_.find(bid) != BID2node_.end());
-        fixed_[BID2node_[bid]] = false;
-        STXXL_VERBOSE1("btree::node_cache unfix_node,  node " << BID2node_[bid]);
+        assert(m_bid2node.find(bid) != m_bid2node.end());
+        m_fixed[m_bid2node[bid]] = false;
+        STXXL_BTREE_CACHE_VERBOSE("btree::node_cache unfix_node,  node " << m_bid2node[bid]);
     }
 
     void swap(node_cache& obj)
     {
-        std::swap(comp_, obj.comp_);
-        std::swap(nodes_, obj.nodes_);
-        std::swap(reqs_, obj.reqs_);
-        change_btree_pointers(btree_);
-        obj.change_btree_pointers(obj.btree_);
-        std::swap(fixed_, obj.fixed_);
-        std::swap(free_nodes_, obj.free_nodes_);
-        std::swap(BID2node_, obj.BID2node_);
-        std::swap(pager_, obj.pager_);
-        std::swap(alloc_strategy_, obj.alloc_strategy_);
+        std::swap(m_cmp, obj.m_cmp);
+        std::swap(m_nodes, obj.m_nodes);
+        std::swap(m_reqs, obj.m_reqs);
+        change_btree_pointers(m_btree);
+        obj.change_btree_pointers(obj.m_btree);
+        std::swap(m_fixed, obj.m_fixed);
+        std::swap(m_free_nodes, obj.m_free_nodes);
+        std::swap(m_bid2node, obj.m_bid2node);
+        std::swap(m_pager, obj.m_pager);
+        std::swap(m_alloc_strategy, obj.m_alloc_strategy);
         std::swap(n_found, obj.n_found);
         std::swap(n_not_found, obj.n_found);
         std::swap(n_created, obj.n_created);
@@ -598,7 +583,6 @@ public:
         if (n_read)
             o << "Found blocks                      : " << n_found << " (" <<
                 100. * double(n_found) / double(n_read) << "%)" << std::endl;
-
         else
             o << "Found blocks                      : " << n_found << " (" <<
                 100 << "%)" << std::endl;
@@ -626,7 +610,6 @@ public:
 
 STXXL_END_NAMESPACE
 
-
 namespace std {
 
 template <class NodeType, class BTreeType>
diff --git a/include/stxxl/bits/containers/btree/root_node.h b/include/stxxl/bits/containers/btree/root_node.h
index a4410dc..a6dfca0 100644
--- a/include/stxxl/bits/containers/btree/root_node.h
+++ b/include/stxxl/bits/containers/btree/root_node.h
@@ -15,7 +15,6 @@
 
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 namespace btree {
diff --git a/include/stxxl/bits/containers/deque.h b/include/stxxl/bits/containers/deque.h
index f4db8f0..93dc654 100644
--- a/include/stxxl/bits/containers/deque.h
+++ b/include/stxxl/bits/containers/deque.h
@@ -18,7 +18,6 @@
 #include <limits>
 #include <stxxl/vector>
 
-
 STXXL_BEGIN_NAMESPACE
 
 template <class ValueType, class VectorType>
@@ -257,8 +256,8 @@ protected:
 public:
     const_deque_iterator() : m_deque(NULL), m_offset(0) { }
 
-    const_deque_iterator(const deque_iterator<deque_type>& it) :
-        m_deque(it.m_deque), m_offset(it.m_offset)
+    const_deque_iterator(const deque_iterator<deque_type>& it)
+        : m_deque(it.m_deque), m_offset(it.m_offset)
     { }
 
     difference_type operator - (const self_type& a) const
diff --git a/include/stxxl/bits/containers/hash_map/block_cache.h b/include/stxxl/bits/containers/hash_map/block_cache.h
new file mode 100644
index 0000000..7de1a8c
--- /dev/null
+++ b/include/stxxl/bits/containers/hash_map/block_cache.h
@@ -0,0 +1,613 @@
+/***************************************************************************
+ *  include/stxxl/bits/containers/hash_map/block_cache.h
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2007 Markus Westphal <marwes at users.sourceforge.net>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#ifndef STXXL_CONTAINERS_HASH_MAP_BLOCK_CACHE_HEADER
+#define STXXL_CONTAINERS_HASH_MAP_BLOCK_CACHE_HEADER
+
+#ifdef STXXL_BOOST_CONFIG
+ #include <boost/config.hpp>
+#endif
+
+#include <stxxl/bits/noncopyable.h>
+#include <stxxl/bits/compat/hash_map.h>
+#include <stxxl/bits/mng/block_manager.h>
+#include <stxxl/bits/containers/pager.h>
+
+#include <vector>
+#include <list>
+
+STXXL_BEGIN_NAMESPACE
+
+namespace hash_map {
+
+//! Used inside block_cache for buffering write requests of cached blocks.
+template <class BlockType>
+class block_cache_write_buffer : private noncopyable
+{
+public:
+    typedef BlockType block_type;
+    typedef typename block_type::bid_type bid_type;
+
+protected:
+    std::vector<block_type*> blocks_;
+    std::vector<request_ptr> reqs_;
+    std::vector<unsigned_type> free_blocks_;
+    std::list<unsigned_type> busy_blocks_; // TODO make that a circular-buffer
+
+public:
+    block_cache_write_buffer(unsigned_type size)
+    {
+        blocks_.reserve(size);
+        free_blocks_.reserve(size);
+        reqs_.resize(size);
+
+        for (unsigned_type i = 0; i < size; i++) {
+            blocks_.push_back(new block_type());
+            free_blocks_.push_back(i);
+        }
+    }
+
+    //! Writes the given block back to disk;
+    //! callers have to exchange the passed block with the returned one!
+    block_type * write(block_type* write_block, const bid_type& bid)
+    {
+        if (free_blocks_.empty()) {
+            unsigned_type i_buffer = busy_blocks_.front();
+            busy_blocks_.pop_front();
+
+            if (reqs_[i_buffer].valid())
+                reqs_[i_buffer]->wait();
+
+            free_blocks_.push_back(i_buffer);
+        }
+
+        unsigned_type i_buffer = free_blocks_.back();
+        free_blocks_.pop_back();
+        block_type* buffer = blocks_[i_buffer];
+
+        blocks_[i_buffer] = write_block;
+        reqs_[i_buffer] = blocks_[i_buffer]->write(bid);
+        busy_blocks_.push_back(i_buffer);
+
+        return buffer;
+    }
+
+    void flush()
+    {
+        while (!busy_blocks_.empty()) {
+            unsigned_type i_buffer = busy_blocks_.front();
+            busy_blocks_.pop_front();
+            if (reqs_[i_buffer].valid())
+                reqs_[i_buffer]->wait();
+        }
+        busy_blocks_.clear();
+        free_blocks_.clear();
+        for (unsigned_type i = 0; i < blocks_.size(); i++)
+            free_blocks_.push_back(i);
+    }
+
+    void swap(block_cache_write_buffer& obj)
+    {
+        std::swap(blocks_, obj.blocks_);
+        std::swap(reqs_, obj.reqs_);
+        std::swap(free_blocks_, obj.free_blocks_);
+        std::swap(busy_blocks_, obj.busy_blocks_);
+    }
+
+    ~block_cache_write_buffer()
+    {
+        flush();
+        for (unsigned_type i = 0; i < blocks_.size(); i++)
+            delete blocks_[i];
+    }
+};
+
+//! Cache of blocks contained in an external memory hash map. Uses the
+//! stxxl::lru_pager as eviction algorithm.
+template <class BlockType>
+class block_cache : private noncopyable
+{
+public:
+    typedef BlockType block_type;
+    typedef typename block_type::bid_type bid_type;
+    typedef typename block_type::value_type subblock_type;
+    typedef typename subblock_type::bid_type subblock_bid_type;
+
+protected:
+    struct bid_eq
+    {
+        bool operator () (const bid_type& a, const bid_type& b) const
+        {
+            return (a.storage == b.storage && a.offset == b.offset);
+        }
+    };
+
+    struct bid_hash
+    {
+        size_t operator () (const bid_type& bid) const
+        {
+            return longhash1(bid.offset + reinterpret_cast<uint64>(bid.storage));
+        }
+#ifdef STXXL_MSVC
+        bool operator () (const bid_type& a, const bid_type& b) const
+        {
+            return (a.storage < b.storage) ||
+                   (a.storage == b.storage && a.offset < b.offset);
+        }
+        enum
+        {                                  // parameters for hash table
+            bucket_size = 4,               // 0 < bucket_size
+            min_buckets = 8                // min_buckets = 2 ^^ N, 0 < N
+        };
+#endif
+    };
+
+    typedef stxxl::lru_pager<> pager_type;
+    typedef block_cache_write_buffer<block_type> write_buffer_type;
+
+    typedef typename compat_hash_map<bid_type, unsigned_type,
+                                     bid_hash>::result bid_map_type;
+
+    enum { valid_all = block_type::size };
+
+    write_buffer_type write_buffer_;
+
+    //! cached blocks
+    std::vector<block_type*> blocks_;
+    //! bids of cached blocks
+    std::vector<bid_type> bids_;
+    std::vector<unsigned_type> retain_count_;
+
+    //! true iff block has been altered while in cache
+    std::vector<unsigned char> dirty_;
+
+    //! valid_all or the actually loaded subblock's index
+    std::vector<unsigned_type> valid_subblock_;
+
+    //! free blocks as indices to blocks_-vector
+    std::vector<unsigned_type> free_blocks_;
+    std::vector<request_ptr> reqs_;
+
+    bid_map_type bid_map_;
+    pager_type pager_;
+
+    /* statistics */
+    int64 n_found;
+    int64 n_not_found;
+    int64 n_read;
+    int64 n_written;
+    int64 n_clean_forced;
+    int64 n_wrong_subblock;
+
+public:
+    //! Construct a new block-cache.
+    //! \param cache_size cache-size in number of blocks
+    block_cache(unsigned_type cache_size)
+        : write_buffer_(config::get_instance()->disks_number() * 2),
+          blocks_(cache_size),
+          bids_(cache_size),
+          retain_count_(cache_size),
+          dirty_(cache_size, false),
+          valid_subblock_(cache_size),
+          free_blocks_(cache_size),
+          reqs_(cache_size),
+          pager_(cache_size),
+          n_found(0),
+          n_not_found(0),
+          n_read(0),
+          n_written(0),
+          n_clean_forced(0),
+          n_wrong_subblock(0)
+    {
+        for (unsigned_type i = 0; i < cache_size; i++)
+        {
+            blocks_[i] = new block_type();
+            free_blocks_[i] = i;
+        }
+    }
+
+    //! Return cache-size
+    unsigned_type size() const
+    {
+        return blocks_.size();
+    }
+
+    ~block_cache()
+    {
+        STXXL_VERBOSE1("hash_map::block_cache destructor addr=" << this);
+
+        for (typename bid_map_type::const_iterator i = bid_map_.begin();
+             i != bid_map_.end(); ++i)
+        {
+            const unsigned_type i_block = (*i).second;
+
+            if (reqs_[i_block].valid())
+                reqs_[i_block]->wait();
+
+            if (dirty_[i_block]) {
+                blocks_[i_block] =
+                    write_buffer_.write(blocks_[i_block], bids_[i_block]);
+            }
+        }
+        write_buffer_.flush();
+
+        for (unsigned_type i = 0; i < size(); ++i)
+            delete blocks_[i];
+    }
+
+protected:
+    //! Force a block from the cache; write back to disk if dirty
+    void kick_block()
+    {
+        unsigned_type i_block2kick;
+
+        unsigned_type max_tries = size() + 1;
+        unsigned_type i = 0;
+        do
+        {
+            ++i;
+            i_block2kick = pager_.kick();
+            if (i == max_tries)
+            {
+                throw std::runtime_error(
+                          "The block cache is too small,"
+                          "no block can be kicked out (all blocks are retained)!"
+                          );
+            }
+            pager_.hit(i_block2kick);
+        } while (retain_count_[i_block2kick] > 0);
+
+        if (valid_subblock_[i_block2kick] == valid_all &&
+            reqs_[i_block2kick].valid())
+        {
+            reqs_[i_block2kick]->wait();
+        }
+
+        if (dirty_[i_block2kick])
+        {
+            blocks_[i_block2kick] =
+                write_buffer_.write(blocks_[i_block2kick], bids_[i_block2kick]);
+            ++n_written;
+        }
+        else
+            ++n_clean_forced;
+
+        bid_map_.erase(bids_[i_block2kick]);
+        free_blocks_.push_back(i_block2kick);
+    }
+
+public:
+    //! Retain a block in cache. Blocks, that are retained by at least one
+    //! client, won't get kicked. Make sure to release all retained blocks
+    //! again.
+    //!
+    //! \param bid block, whose retain-count is to be increased
+    //! \return true if block was cached, false otherwise
+    bool retain_block(const bid_type& bid)
+    {
+        typename bid_map_type::const_iterator it = bid_map_.find(bid);
+        if (it == bid_map_.end())
+            return false;
+
+        unsigned_type i_block = (*it).second;
+        retain_count_[i_block]++;
+        return true;
+    }
+
+    //! Release a block (decrement retain-count). If the retain-count reaches
+    //! 0, a block may be kicked again.
+    //!
+    //! \param bid block, whose retain-count is to be decremented
+    //! \return true if operation was successfull (block cached and
+    //!         retain-count > 0), false otherwise
+    bool release_block(const bid_type& bid)
+    {
+        typename bid_map_type::const_iterator it = bid_map_.find(bid);
+        if (it == bid_map_.end())
+            return false;
+
+        unsigned_type i_block = (*it).second;
+        if (retain_count_[i_block] == 0)
+            return false;
+
+        retain_count_[i_block]--;
+        return true;
+    }
+
+    //! Set given block's dirty-flag. Note: If the given block was only
+    //! partially loaded, it will be completely reloaded.
+    //!
+    //! \return true if block cached, false otherwise
+    bool make_dirty(const bid_type& bid)
+    {
+        typename bid_map_type::const_iterator it = bid_map_.find(bid);
+        if (it == bid_map_.end())
+            return false;
+
+        unsigned_type i_block = (*it).second;
+
+        // only complete blocks can be marked as dirty
+        if (valid_subblock_[i_block] != valid_all)
+        {
+            reqs_[i_block] = blocks_[i_block]->read(bid);
+            valid_subblock_[i_block] = valid_all;
+        }
+
+        if (reqs_[i_block].valid()) {
+            if (reqs_[i_block]->poll() == false)
+                reqs_[i_block]->wait();
+        }
+
+        dirty_[i_block] = true;
+        return true;
+    }
+
+    //! Retrieve a subblock from the cache. If not yet cached, only the
+    //! subblock will be loaded.
+    //!
+    //! \param bid block, to which the requested subblock belongs
+    //! \param i_subblock index of requested subblock
+    //! \return pointer to subblock
+    subblock_type * get_subblock(const bid_type& bid, unsigned_type i_subblock)
+    {
+        block_type* block;
+        unsigned_type i_block;
+        n_read++;
+
+        // block (partly) cached?
+        typename bid_map_type::const_iterator it = bid_map_.find(bid);
+        if (it != bid_map_.end())
+        {
+            i_block = (*it).second;
+            block = blocks_[i_block];
+
+            // complete block or wanted subblock is in the cache
+            if (valid_subblock_[i_block] == valid_all ||
+                valid_subblock_[i_block] == i_subblock)
+            {
+                ++n_found;
+
+                if (valid_subblock_[i_block] == valid_all &&
+                    reqs_[i_block].valid())
+                {
+                    // request not yet completed?
+                    if (reqs_[i_block]->poll() == false)
+                        reqs_[i_block]->wait();
+                }
+
+                return &((*block)[i_subblock]);
+            }
+
+            // wrong subblock in cache
+            else
+            {
+                ++n_not_found;
+                ++n_wrong_subblock;
+                // actually loading the subblock will be done below
+
+                // note: if a client still holds a reference to the "old"
+                // subblock, it will find its data to be still valid.
+            }
+        }
+        // block not cached
+        else
+        {
+            n_not_found++;
+
+            if (free_blocks_.empty())
+                kick_block();
+
+            i_block = free_blocks_.back(), free_blocks_.pop_back();
+            block = blocks_[i_block];
+
+            bid_map_[bid] = i_block;
+            bids_[i_block] = bid;
+            dirty_[i_block] = false;
+            retain_count_[i_block] = 0;
+        }
+
+        // now actually load the wanted subblock and store it within *block
+        subblock_bid_type subblock_bid(
+            bid.storage, bid.offset + i_subblock * subblock_type::raw_size
+            );
+        request_ptr req = ((*block)[i_subblock]).read(subblock_bid);
+        req->wait();
+
+        valid_subblock_[i_block] = i_subblock;
+        pager_.hit(i_block);
+
+        return &((*block)[i_subblock]);
+    }
+
+    //! Load a block in advance.
+    //! \param bid Identifier of the block to load
+    void prefetch_block(const bid_type& bid)
+    {
+        unsigned_type i_block;
+
+        // cached
+        typename bid_map_type::const_iterator it = bid_map_.find(bid);
+        if (it != bid_map_.end())
+        {
+            i_block = (*it).second;
+
+            // complete block cached; we can finish here
+            if (valid_subblock_[i_block] == valid_all) {
+                pager_.hit(i_block);
+                return;
+            }
+
+            // only a single subblock is cached; we have to load the
+            // complete block (see below)
+        }
+        // not even a subblock cached
+        else {
+            if (free_blocks_.empty())
+                kick_block();
+
+            i_block = free_blocks_.back(), free_blocks_.pop_back();
+
+            bid_map_[bid] = i_block;
+            bids_[i_block] = bid;
+            retain_count_[i_block] = 0;
+            dirty_[i_block] = false;
+        }
+
+        // now actually load the block
+        reqs_[i_block] = blocks_[i_block]->read(bid);
+        valid_subblock_[i_block] = valid_all;
+        pager_.hit(i_block);
+    }
+
+    //! Write all dirty blocks back to disk
+    void flush()
+    {
+        for (typename bid_map_type::const_iterator i = bid_map_.begin();
+             i != bid_map_.end(); ++i)
+        {
+            const unsigned_type i_block = (*i).second;
+            if (dirty_[i_block])
+            {
+                blocks_[i_block] =
+                    write_buffer_.write(blocks_[i_block], bids_[i_block]);
+
+                dirty_[i_block] = false;
+            }
+        }
+        write_buffer_.flush();
+    }
+
+    //! Empty cache; don't write back dirty blocks
+    void clear()
+    {
+        free_blocks_.clear();
+        for (unsigned_type i = 0; i < size(); i++)
+        {
+            if (reqs_[i].valid()) {
+                reqs_[i]->cancel();
+                reqs_[i]->wait();
+            }
+
+            free_blocks_.push_back(i);
+        }
+        bid_map_.clear();
+    }
+
+    //! Print statistics: Number of hits/misses, blocks forced from cache or
+    //! written back.
+    void print_statistics(std::ostream& o = std::cout) const
+    {
+        o << "Blocks found                      : " << n_found << " (" << 100. * double(n_found) / double(n_read) << "%)" << std::endl;
+        o << "Blocks not found                  : " << n_not_found << std::endl;
+        o << "Blocks read                       : " << n_read << std::endl;
+        o << "Blocks written                    : " << n_written << std::endl;
+        o << "Clean blocks forced from the cache: " << n_clean_forced << std::endl;
+        o << "Wrong subblock cached             : " << n_wrong_subblock << std::endl;
+    }
+
+    //! Reset all counters to zero
+    void reset_statistics()
+    {
+        n_found = 0;
+        n_not_found = 0;
+        n_read = 0;
+        n_written = 0;
+        n_clean_forced = 0;
+        n_wrong_subblock = 0;
+    }
+
+    //! Exchange contents of two caches
+    //! \param obj cache to swap contents with
+    void swap(block_cache& obj)
+    {
+        write_buffer_.swap(obj.write_buffer_);
+        std::swap(blocks_, obj.blocks_);
+        std::swap(bids_, obj.bids_);
+        std::swap(retain_count_, obj.retain_count_);
+
+        std::swap(dirty_, obj.dirty_);
+        std::swap(valid_subblock_, obj.valid_subblock_);
+
+        std::swap(free_blocks_, obj.free_blocks_);
+        std::swap(reqs_, obj.reqs_);
+
+        std::swap(bid_map_, obj.bid_map_);
+        std::swap(pager_, obj.pager_);
+
+        std::swap(n_found, obj.n_found);
+        std::swap(n_not_found, obj.n_found);
+        std::swap(n_read, obj.n_read);
+        std::swap(n_written, obj.n_written);
+        std::swap(n_clean_forced, obj.n_clean_forced);
+        std::swap(n_wrong_subblock, obj.n_wrong_subblock);
+    }
+
+#if 0   // for debugging, requires data items to be ostream-able.
+
+    //! Show currently cached blocks
+    void dump_cache(std::ostream& os) const
+    {
+        for (size_t i = 0; i < blocks_.size(); i++)
+        {
+            bid_type bid = bids_[i];
+            if (bid_map_.count(bid) == 0) {
+                os << "Block " << i << ": empty\n";
+                continue;
+            }
+
+            os << "Block " << i << ": bid=" << bids_[i]
+               << " dirty=" << dirty_[i]
+               << " retain_count=" << retain_count_[i]
+               << " valid_subblock=" << valid_subblock_[i] << "\n";
+
+            for (size_t k = 0; k < block_type::size; k++) {
+                os << "  Subbblock " << k << ": ";
+                if (valid_subblock_[i] != valid_all && valid_subblock_[i] != k)
+                {
+                    os << "not valid\n";
+                    continue;
+                }
+                for (size_t l = 0; l < block_type::value_type::size; l++) {
+                    os << "(" << (*blocks_[i])[k][l].first
+                       << ", " << (*blocks_[i])[k][l].second << ") ";
+                }
+                os << std::endl;
+            }
+        }
+    }
+#endif
+};
+
+} // namespace hash_map
+
+STXXL_END_NAMESPACE
+
+namespace std {
+
+template <class BlockType>
+void swap(stxxl::hash_map::block_cache_write_buffer<BlockType>& a,
+          stxxl::hash_map::block_cache_write_buffer<BlockType>& b)
+{
+    a.swap(b);
+}
+
+template <class HashMap>
+void swap(stxxl::hash_map::block_cache<HashMap>& a,
+          stxxl::hash_map::block_cache<HashMap>& b)
+{
+    a.swap(b);
+}
+
+} // namespace std
+
+#endif // !STXXL_CONTAINERS_HASH_MAP_BLOCK_CACHE_HEADER
diff --git a/include/stxxl/bits/containers/hash_map/hash_map.h b/include/stxxl/bits/containers/hash_map/hash_map.h
new file mode 100644
index 0000000..245065e
--- /dev/null
+++ b/include/stxxl/bits/containers/hash_map/hash_map.h
@@ -0,0 +1,1609 @@
+/***************************************************************************
+ *  include/stxxl/bits/containers/hash_map/hash_map.h
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2007 Markus Westphal <marwes at users.sourceforge.net>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#ifndef STXXL_CONTAINERS_HASH_MAP_HASH_MAP_HEADER
+#define STXXL_CONTAINERS_HASH_MAP_HASH_MAP_HEADER
+
+#include <functional>
+
+#include <stxxl/bits/noncopyable.h>
+#include <stxxl/bits/namespace.h>
+#include <stxxl/bits/mng/block_manager.h>
+#include <stxxl/bits/common/tuple.h>
+#include <stxxl/bits/stream/stream.h>
+#include <stxxl/bits/stream/sort_stream.h>
+
+#include <stxxl/bits/containers/hash_map/iterator.h>
+#include <stxxl/bits/containers/hash_map/iterator_map.h>
+#include <stxxl/bits/containers/hash_map/block_cache.h>
+#include <stxxl/bits/containers/hash_map/util.h>
+
+STXXL_BEGIN_NAMESPACE
+
+#define STXXL_VERBOSE_HASH_MAP(m) \
+    STXXL_VERBOSE1("hash_map[" << static_cast<const void*>(this) << "]::" << m)
+
+//! External memory hash-map
+namespace hash_map {
+
+/*!
+ * Main implementation of external memory hash map.
+ *
+ * \tparam KeyType the key type
+ * \tparam MappedType the mapped type associated with a key
+ * \tparam HashType a hash functional
+ * \tparam CompareType a less comparison relation for KeyType
+ * \tparam SubBlockSize the raw size of a subblock (caching granularity)
+ * (default: 8192)
+ * \tparam SubBlocksPerBlock the number of subblocks per external block
+ * (default: 256 -> 2MB blocks)
+ * \tparam AllocType allocator for internal-memory buffer
+ */
+template <class KeyType,
+          class MappedType,
+          class HashType,
+          class KeyCompareType,
+          unsigned SubBlockSize = 4*1024,
+          unsigned SubBlocksPerBlock = 256,
+          class AllocatorType = std::allocator<std::pair<const KeyType, MappedType> >
+          >
+class hash_map : private noncopyable
+{
+protected:
+    typedef hash_map<KeyType, MappedType, HashType, KeyCompareType,
+                     SubBlockSize, SubBlocksPerBlock> self_type;
+
+public:
+    //! type of the keys being used
+    typedef KeyType key_type;
+    //! type of the data to be stored
+    typedef MappedType mapped_type;
+    //! actually store (key-data)-pairs
+    typedef std::pair<KeyType, MappedType> value_type;
+    //! type for value-references
+    typedef value_type& reference;
+    //! type for constant value-references
+    typedef const value_type& const_reference;
+    //! pointer to type of keys
+    typedef value_type* pointer;
+    //! const pointer to type of keys
+    typedef value_type const* const_pointer;
+
+    typedef stxxl::external_size_type external_size_type;
+    typedef stxxl::internal_size_type internal_size_type;
+    typedef stxxl::int64 difference_type;
+
+    //! type of (mother) hash-function
+    typedef HashType hasher;
+    //! functor that imposes a ordering on keys (but see _lt())
+    typedef KeyCompareType key_compare;
+    //! allocator template type
+    typedef AllocatorType allocator_type;
+
+    typedef hash_map_iterator<self_type> iterator;
+    typedef hash_map_const_iterator<self_type> const_iterator;
+
+    //! subblock- and block-size in bytes
+    enum {
+        block_raw_size = SubBlocksPerBlock * SubBlockSize,
+        subblock_raw_size = SubBlockSize
+    };
+
+    //! Subblock-size as number of elements, block-size as number of subblocks
+    enum {
+        subblocks_per_block = SubBlocksPerBlock,
+        subblock_size = SubBlockSize / sizeof(value_type)
+    };
+
+    //! a subblock consists of subblock_size values
+    typedef typed_block<subblock_raw_size, value_type> subblock_type;
+    //! a block consists of block_size subblocks
+    typedef typed_block<block_raw_size, subblock_type> block_type;
+
+    //! block-identifier for subblocks
+    typedef typename subblock_type::bid_type subblock_bid_type;
+    //! block-identifier for blocks
+    typedef typename block_type::bid_type bid_type;
+    //! container for block-bids
+    typedef std::vector<bid_type> bid_container_type;
+    //! iterator for block-bids
+    typedef typename bid_container_type::iterator bid_iterator_type;
+
+    enum source_type { src_internal, src_external, src_unknown };
+
+    //! nodes for internal-memory buffer
+    typedef node<value_type> node_type;
+    //! buckets
+    typedef bucket<node_type> bucket_type;
+
+    typedef std::vector<bucket_type> buckets_container_type;
+
+    //! for tracking active iterators
+    typedef iterator_map<self_type> iterator_map_type;
+
+    typedef block_cache<block_type> block_cache_type;
+
+    typedef buffered_reader<block_cache_type, bid_iterator_type> reader_type;
+
+    typedef typename allocator_type::template rebind<node_type>::other node_allocator_type;
+
+protected:
+    //! user supplied mother hash-function
+    hasher hash_;
+    //! user supplied strict-weak-ordering for keys
+    key_compare cmp_;
+    //! array of bucket
+    buckets_container_type buckets_;
+    //! blocks-ids of allocated blocks
+    bid_container_type bids_;
+    //! size of internal-memory buffer in number of entries
+    internal_size_type buffer_size_;
+    //! maximum size for internal-memory buffer
+    internal_size_type max_buffer_size_;
+    //! keeps track of all active iterators
+    iterator_map_type iterator_map_;
+
+    mutable block_cache_type block_cache_;
+    //! used to allocate new nodes for internal buffer
+    node_allocator_type node_allocator_;
+    //! false if the total-number of values is correct (false) or true if
+    //! estimated (true); see *oblivious_-methods
+    mutable bool oblivious_;
+    //! (estimated) number of values
+    mutable external_size_type num_total_;
+    //! desired load factor after rehashing
+    float opt_load_factor_;
+
+public:
+    /*!
+     * Construct a new hash-map
+     * \param n initial number of buckets
+     * \param hf hash-function
+     * \param cmp comparator-object
+     * \param buffer_size size of internal-memory buffer in bytes
+     * \param a allocation-strategory for internal-memory buffer
+     */
+    hash_map(internal_size_type n = 0,
+             const hasher& hf = hasher(),
+             const key_compare& cmp = key_compare(),
+             internal_size_type buffer_size = 128*1024*1024,
+             const allocator_type& a = allocator_type())
+        : hash_(hf),
+          cmp_(cmp),
+          buckets_(n),
+          bids_(0),
+          buffer_size_(0),
+          iterator_map_(this),
+          block_cache_(tuning::get_instance()->blockcache_size),
+          node_allocator_(a),
+          oblivious_(false),
+          num_total_(0),
+          opt_load_factor_(0.875)
+    {
+        max_buffer_size_ = buffer_size / sizeof(node_type);
+    }
+
+    /*!
+     * Construct a new hash-map and insert all values in the range [f,l)
+     *
+     * \param begin beginning of the range
+     * \param end end of the range
+     * \param mem_to_sort internal memory that may be used for bulk-construction (not
+     * to be confused with the buffer-memory)
+     * \param n initial number of buckets
+     * \param hf hash-function
+     * \param cmp comparator-object
+     * \param buffer_size size of internal-memory buffer in bytes
+     * \param a allocation-strategory for internal-memory buffer
+     */
+    template <class InputIterator>
+    hash_map(InputIterator begin, InputIterator end,
+             internal_size_type mem_to_sort = 256*1024*1024,
+             internal_size_type n = 0,
+             const hasher& hf = hasher(),
+             const key_compare& cmp = key_compare(),
+             internal_size_type buffer_size = 128*1024*1024,
+             const allocator_type& a = allocator_type())
+        : hash_(hf),
+          cmp_(cmp),
+          buckets_(n),                 // insert will determine a good size
+          bids_(0),
+          buffer_size_(0),
+          iterator_map_(this),
+          block_cache_(tuning::get_instance()->blockcache_size),
+          node_allocator_(a),
+          oblivious_(false),
+          num_total_(0),
+          opt_load_factor_(0.875)
+    {
+        max_buffer_size_ = buffer_size / sizeof(node_type);
+        insert(begin, end, mem_to_sort);
+    }
+
+    ~hash_map()
+    {
+        clear();
+    }
+
+public:
+    //! Hash-function used by this hash-map
+    hasher hash_function() const
+    { return hash_; }
+
+    //! Strict-weak-ordering used by this hash-map
+    key_compare key_cmp() const
+    { return cmp_; }
+
+    //! Get node memory allocator
+    allocator_type get_allocator() const
+    { return node_allocator_; }
+
+protected:
+    /*!
+     * After using *oblivious_-methods only an estimate for the total number of
+     * elements can be given.  This method accesses external memory to
+     * calculate the exact number.
+     */
+    void _make_conscious()
+    { /* const */                           //! TODO: make const again
+        if (!oblivious_)
+            return;
+
+        typedef HashedValuesStream<self_type, reader_type> values_stream_type;
+
+        // this will start prefetching automatically
+        reader_type reader(bids_.begin(), bids_.end(), block_cache_);
+        values_stream_type values(buckets_.begin(), buckets_.end(),
+                                  reader, bids_.begin(), *this);
+
+        num_total_ = 0;
+        while (!values.empty())
+        {
+            ++num_total_;
+            ++values;
+        }
+        oblivious_ = false;
+    }
+
+public:
+    //! Number of values currently stored. Note: If the correct number is
+    //! currently unknown (because *_oblivous-methods were used), external
+    //! memory will be scanned.
+    external_size_type size() const
+    {
+        if (oblivious_)
+            ((self_type*)this)->_make_conscious();
+        return num_total_;
+    }
+
+    //! The hash-map may store up to this number of values
+    external_size_type max_size() const
+    {
+        return std::numeric_limits<external_size_type>::max();
+    }
+
+    //! Check if container is empty.
+    bool empty() const
+    {
+        return size() != 0;
+    }
+
+    /*!
+     * Insert a new value if no value with the same key is already present;
+     * external memory must therefore be accessed
+     *
+     * \param value what to insert
+     * \return a tuple whose second part is true iff the value was actually
+     * added (no value with the same key present); the first part is an
+     * iterator pointing to the newly inserted or already stored value
+     */
+    std::pair<iterator, bool> insert(const value_type& value)
+    {
+        if (buckets_.size() == 0)
+            _rebuild_buckets(128);
+
+        internal_size_type i_bucket = _bkt_num(value.first);
+        bucket_type& bucket = buckets_[i_bucket];
+        node_type* node = _find_key_internal(bucket, value.first);
+
+        // found value in internal memory
+        if (node && _eq(node->value_.first, value.first))
+        {
+            bool old_deleted = node->deleted();
+            if (old_deleted)
+            {
+                node->set_deleted(false);
+                node->value_ = value;
+                ++num_total_;
+            }
+            return std::pair<iterator, bool>(
+                iterator(this, i_bucket, node,
+                         0, src_internal, false, value.first), old_deleted);
+        }
+
+        // search external memory ...
+        else
+        {
+            tuple<external_size_type, value_type> result
+                = _find_key_external(bucket, value.first);
+
+            external_size_type i_external = result.first;
+            value_type ext_value = result.second;
+
+            // ... if found, return iterator pointing to external position ...
+            if (i_external < bucket.n_external_ && _eq(ext_value.first, value.first))
+            {
+                return std::pair<iterator, bool>(
+                    iterator(this, i_bucket, node,
+                             i_external, src_external, true, value.first), false);
+            }
+            // ... otherwise create a new buffer-node to add the value
+            else
+            {
+                ++num_total_;
+                node_type* new_node =
+                    node
+                    ? node->set_next(_new_node(value, node->next(), false))
+                    : (bucket.list_ = _new_node(value, bucket.list_, false));
+
+                iterator it(this, i_bucket, new_node,
+                            0, src_internal, false, value.first);
+
+                ++buffer_size_;
+                if (buffer_size_ >= max_buffer_size_)
+                    _rebuild_buckets();                 // will fix it as well
+
+                return std::pair<iterator, bool>(it, true);
+            }
+        }
+    }
+
+    //! Insert a value; external memory is not accessed so that another value
+    //! with the same key may be overwritten
+    //! \param value what to insert
+    //! \return iterator pointing to the inserted value
+    iterator insert_oblivious(const value_type& value)
+    {
+        internal_size_type i_bucket = _bkt_num(value.first);
+        bucket_type& bucket = buckets_[i_bucket];
+        node_type* node = _find_key_internal(bucket, value.first);
+
+        // found value in internal memory
+        if (node && _eq(node->value_.first, value.first))
+        {
+            if (node->deleted())
+                ++num_total_;
+
+            node->set_deleted(false);
+            node->value_ = value;
+            return iterator(this, i_bucket, node,
+                            0, src_internal, false, value.first);
+        }
+        // not found; ignore external memory and add a new node to the
+        // internal-memory buffer
+        else
+        {
+            oblivious_ = true;
+            ++num_total_;
+            node_type* new_node =
+                node
+                ? node->set_next(_new_node(value, node->next(), false))
+                : (bucket.list_ = _new_node(value, bucket.list_, false));
+
+            // there may be some iterators that reference the newly inserted
+            // value in external memory these need to be fixed (make them point
+            // to new_node)
+            iterator_map_.fix_iterators_2int(i_bucket, value.first, new_node);
+
+            iterator it(this, i_bucket, new_node,
+                        0, src_internal, false, value.first);
+
+            ++buffer_size_;
+            if (buffer_size_ >= max_buffer_size_)
+                _rebuild_buckets();
+
+            return it;
+        }
+    }
+
+    //! Erase value by iterator
+    //! \param it iterator pointing to the value to erase
+    void erase(const_iterator it)
+    {
+        --num_total_;
+        bucket_type& bucket = buckets_[it.i_bucket_];
+
+        if (it.source_ == src_internal)
+        {
+            it.node_->set_deleted(true);
+            iterator_map_.fix_iterators_2end(it.i_bucket_, it.key_);
+        }
+        else {
+            // find biggest value < iterator's value
+            node_type* node = _find_key_internal(bucket, it.key_);
+            assert(!node || !_eq(node->value_.first, it.key_));
+
+            // add delete-node to buffer
+            if (node)
+                node->set_next(_new_node(value_type(it.key_, mapped_type()), node->next(), true));
+            else
+                bucket.list_ = _new_node(value_type(it.key_, mapped_type()), bucket.list_, true);
+
+            iterator_map_.fix_iterators_2end(it.i_bucket_, it.key_);
+
+            ++buffer_size_;
+            if (buffer_size_ >= max_buffer_size_)
+                _rebuild_buckets();
+        }
+    }
+
+    //! Erase value by key; check external memory
+    //! \param key key of value to erase
+    //! \return number of values actually erased (0 or 1)
+    external_size_type erase(const key_type& key)
+    {
+        internal_size_type i_bucket = _bkt_num(key);
+        bucket_type& bucket = buckets_[i_bucket];
+        node_type* node = _find_key_internal(bucket, key);
+
+        // found in internal memory
+        if (node && _eq(node->value_.first, key))
+        {
+            if (!node->deleted())
+            {
+                node->set_deleted(true);
+                --num_total_;
+                iterator_map_.fix_iterators_2end(i_bucket, key);
+                return 1;
+            }
+            else
+                return 0;               // already deleted
+        }
+        // check external memory
+        else
+        {
+            tuple<external_size_type, value_type> result
+                = _find_key_external(bucket, key);
+
+            external_size_type i_external = result.first;
+            value_type ext_value = result.second;
+
+            // found in external memory; add delete-node
+            if (i_external < bucket.n_external_ && _eq(ext_value.first, key))
+            {
+                --num_total_;
+
+                if (node)
+                    node->set_next(_new_node(value_type(key, mapped_type()), node->next(), true));
+                else
+                    bucket.list_ = _new_node(value_type(key, mapped_type()), bucket.list_, true);
+
+                iterator_map_.fix_iterators_2end(i_bucket, key);
+
+                ++buffer_size_;
+                if (buffer_size_ >= max_buffer_size_)
+                    _rebuild_buckets();
+
+                return 1;
+            }
+            // no value with given key
+            else
+                return 0;
+        }
+    }
+
+    //! Erase value by key but without looking at external memory
+    //! \param key key for value to release
+    void erase_oblivious(const key_type& key)
+    {
+        internal_size_type i_bucket = _bkt_num(key);
+        bucket_type& bucket = buckets_[i_bucket];
+        node_type* node = _find_key_internal(bucket, key);
+
+        // found value in internal-memory
+        if (node && _eq(node->value_.first, key))
+        {
+            if (!node->deleted())
+            {
+                --num_total_;
+                node->set_deleted(true);
+                iterator_map_.fix_iterators_2end(i_bucket, key);
+            }
+        }
+        // not found; ignore external memory and add delete-node
+        else
+        {
+            oblivious_ = true;
+            --num_total_;
+
+            if (node)
+                node->set_next(_new_node(value_type(key, mapped_type()), node->next(), true));
+            else
+                bucket.list_ = _new_node(value_type(key, mapped_type()), bucket.list_, true);
+
+            iterator_map_.fix_iterators_2end(i_bucket, key);
+
+            ++buffer_size_;
+            if (buffer_size_ >= max_buffer_size_)
+                _rebuild_buckets();
+        }
+    }
+
+    //! Reset hash-map: erase all values, invalidate all iterators
+    void clear()
+    {
+        STXXL_VERBOSE_HASH_MAP("clear()");
+
+        iterator_map_.fix_iterators_all2end();
+        block_cache_.flush();
+        block_cache_.clear();
+
+        // reset buckets and release buffer-memory
+        for (internal_size_type i_bucket = 0;
+             i_bucket < buckets_.size(); i_bucket++)
+        {
+            _erase_nodes(buckets_[i_bucket].list_, NULL);
+            buckets_[i_bucket] = bucket_type();
+        }
+        oblivious_ = false;
+        num_total_ = 0;
+        buffer_size_ = 0;
+
+        // free external memory
+        block_manager* bm = block_manager::get_instance();
+        bm->delete_blocks(bids_.begin(), bids_.end());
+        bids_.clear();
+    }
+
+    //! Exchange stored values with another hash-map
+    //! \param obj hash-map to swap values with
+    void swap(self_type& obj)
+    {
+        std::swap(buckets_, obj.buckets_);
+        std::swap(bids_, obj.bids_);
+
+        std::swap(oblivious_, obj.oblivious_);
+        std::swap(num_total_, obj.num_total_);
+
+        std::swap(node_allocator_, obj.node_allocator_);
+
+        std::swap(hash_, obj.hash_);
+        std::swap(cmp_, obj.cmp_);
+
+        std::swap(buffer_size_, obj.buffer_size_);
+        std::swap(max_buffer_size_, obj.max_buffer_size_);
+
+        std::swap(opt_load_factor_, obj.opt_load_factor_);
+
+        std::swap(iterator_map_, obj.iterator_map_);
+
+        std::swap(block_cache_, obj.block_cache_);
+    }
+
+protected:
+    // find statistics
+    mutable external_size_type n_subblocks_loaded;
+    mutable external_size_type n_found_internal;
+    mutable external_size_type n_found_external;
+    mutable external_size_type n_not_found;
+
+public:
+    //! Reset hash-map statistics
+    void reset_statistics()
+    {
+        block_cache_.reset_statistics();
+        n_subblocks_loaded = n_found_external = n_found_internal = n_not_found = 0;
+    }
+
+    //! Print short general statistics to output stream
+    void print_statistics(std::ostream& o = std::cout) const
+    {
+        o << "Find-statistics:" << std::endl;
+        o << "  Found internal     : " << n_found_internal << std::endl;
+        o << "  Found external     : " << n_found_external << std::endl;
+        o << "  Not found          : " << n_not_found << std::endl;
+        o << "  Subblocks searched : " << n_subblocks_loaded << std::endl;
+
+        iterator_map_.print_statistics(o);
+        block_cache_.print_statistics(o);
+    }
+
+    //! Look up value by key. Non-const access.
+    //! \param key key for value to look up
+    iterator find(const key_type& key)
+    {
+        if (buffer_size_ + 1 >= max_buffer_size_)       // (*)
+            _rebuild_buckets();
+
+        internal_size_type i_bucket = _bkt_num(key);
+        bucket_type& bucket = buckets_[i_bucket];
+        node_type* node = _find_key_internal(bucket, key);
+
+        // found in internal-memory buffer
+        if (node && _eq(node->value_.first, key)) {
+            n_found_internal++;
+            if (node->deleted())
+                return this->_end<iterator>();
+            else
+                return iterator(this, i_bucket, node, 0, src_internal, false, key);
+        }
+        // search external elements
+        else {
+            tuple<external_size_type, value_type> result
+                = _find_key_external(bucket, key);
+
+            external_size_type i_external = result.first;
+            value_type value = result.second;
+
+            // found in external memory
+            if (i_external < bucket.n_external_ && _eq(value.first, key)) {
+                n_found_external++;
+
+                // we ultimately expect the user to de-reference the returned
+                // iterator to change its value (non-const!).  to prevent an
+                // additional disk-access, we create a new node in the
+                // internal-memory buffer overwriting the external value.
+                // note: by checking and rebuilding (if neccessary) in (*) we
+                // made sure that the new node will fit into the buffer and no
+                // rebuild is neccessary here.
+                node_type* new_node =
+                    node
+                    ? node->set_next(_new_node(value, node->next(), false))
+                    : (bucket.list_ = _new_node(value, bucket.list_, false));
+
+                ++buffer_size_;
+
+                iterator_map_.fix_iterators_2int(i_bucket, value.first, new_node);
+
+                return iterator(this, i_bucket, new_node, i_external + 1, src_internal, true, key);
+            }
+            // not found in external memory
+            else {
+                n_not_found++;
+                return this->_end<iterator>();
+            }
+        }
+    }
+
+    //! Look up value by key. Const access.
+    //! \param key key for value to look up
+    const_iterator find(const key_type& key) const
+    {
+        internal_size_type i_bucket = _bkt_num(key);
+        const bucket_type& bucket = buckets_[i_bucket];
+        node_type* node = _find_key_internal(bucket, key);
+
+        // found in internal-memory buffer
+        if (node && _eq(node->value_.first, key)) {
+            n_found_internal++;
+            if (node->deleted())
+                return this->_end<const_iterator>();
+            else
+                return const_iterator((self_type*)this, i_bucket, node, 0, src_internal, false, key);
+        }
+        // search external elements
+        else {
+            tuple<external_size_type, value_type> result
+                = _find_key_external(bucket, key);
+
+            external_size_type i_external = result.first;
+            value_type value = result.second;
+
+            // found in external memory
+            if (i_external < bucket.n_external_ && _eq(value.first, key)) {
+                n_found_external++;
+                return const_iterator((self_type*)this, i_bucket, node, i_external, src_external, true, key);
+            }
+            // not found in external memory
+            else {
+                n_not_found++;
+                return this->_end<const_iterator>();
+            }
+        }
+    }
+
+    //! Number of values with given key
+    //! \param k key for value to look up
+    //! \return 0 or 1 depending on the presence of a value with the given key
+    external_size_type count(const key_type& k) const
+    {
+        const_iterator cit = find(k);
+        return (cit == end()) ? 0 : 1;
+    }
+
+    //! Finds a range containing all values with given key. Non-const access
+    //! \param key key to look for#
+    //! \return range may be empty or contains exactly one value
+    std::pair<iterator, iterator> equal_range(const key_type& key)
+    {
+        iterator it = find(key);
+        return std::pair<iterator, iterator>(it, it);
+    }
+
+    //! Finds a range containing all values with given key. Const access
+    //! \param key key to look for#
+    //! \return range may be empty or contains exactly one value
+    std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const
+    {
+        const_iterator cit = find(key);
+        return std::pair<const_iterator, const_iterator>(cit, cit);
+    }
+
+    //! Convenience operator to quickly insert or find values. Use with caution
+    //! since using this operator will check external-memory.
+    mapped_type& operator [] (const key_type& key)
+    {
+        if (buffer_size_ + 1 >= max_buffer_size_)       // (*)
+            _rebuild_buckets();
+
+        internal_size_type i_bucket = _bkt_num(key);
+        bucket_type& bucket = buckets_[i_bucket];
+        node_type* node = _find_key_internal(bucket, key);
+
+        // found in internal-memory buffer
+        if (node && _eq(node->value_.first, key)) {
+            if (node->deleted()) {
+                node->set_deleted(false);
+                node->value_.second = mapped_type();
+                ++num_total_;
+            }
+            return node->value_.second;
+        }
+        // search external elements
+        else {
+            tuple<external_size_type, value_type> result
+                = _find_key_external(bucket, key);
+
+            external_size_type i_external = result.first;
+            value_type found_value = result.second;
+
+            value_type buffer_value =
+                (i_external < bucket.n_external_ && _eq(found_value.first, key))
+                ? found_value
+                : value_type(key, mapped_type());
+
+            // add a new node to the buffer. this new node's value overwrites
+            // the external value if it was found and otherwise is set to (key,
+            // mapped_type())
+            node_type* new_node =
+                node
+                ? node->set_next(_new_node(buffer_value, node->next(), false))
+                : (bucket.list_ = _new_node(buffer_value, bucket.list_, false));
+
+            ++buffer_size_;
+            // note that we already checked the buffer-size in (*)
+
+            return new_node->value_.second;
+        }
+    }
+
+    //! Number of buckets
+    internal_size_type bucket_count() const
+    { return buckets_.size(); }
+
+    //! Maximum number of buckets
+    internal_size_type max_bucket_count() const
+    { return (internal_size_type)(max_size() / subblock_size); }
+
+    //! Bucket-index for values with given key.
+    internal_size_type bucket_index(const key_type& k) const
+    { return _bkt_num(k); }
+
+public:
+    //! Average number of (sub)blocks occupied by a bucket.
+    float load_factor() const
+    { return (float)num_total_ / ((float)subblock_size * (float)buckets_.size()); }
+
+    //! Get desired load-factor
+    float opt_load_factor() const { return opt_load_factor_; }
+
+    //! Set desired load-factor
+    void opt_load_factor(float z)
+    {
+        opt_load_factor_ = z;
+        if (load_factor() > opt_load_factor_)
+            _rebuild_buckets();
+    }
+
+    //! Rehash with (at least) n buckets
+    void rehash(internal_size_type n = 0)
+    {
+        _rebuild_buckets(n);
+    }
+
+    //! Number of bytes occupied by buffer
+    internal_size_type buffer_size() const
+    {
+        // buffer-size internally stored as number of nodes
+        return buffer_size_ * sizeof(node_type);
+    }
+
+    //! Maximum buffer size in byte
+    internal_size_type max_buffer_size() const
+    {
+        return max_buffer_size_ * sizeof(node_type);
+    }
+
+    //! Set maximum buffer size
+    //! \param buffer_size new size in byte
+    void max_buffer_size(internal_size_type buffer_size)
+    {
+        max_buffer_size_ = buffer_size / sizeof(node_type);
+        if (buffer_size_ >= max_buffer_size_)
+            _rebuild_buckets();
+    }
+
+protected:
+    //! iterator pointing to the beginnning of the hash-map
+    template <class Iterator>
+    Iterator _begin() const
+    {
+        self_type* non_const_this = (self_type*)this;
+
+        if (buckets_.size() == 0)
+            return _end<Iterator>();
+
+        // correct key will be set by find_next()
+        Iterator it(non_const_this, 0, buckets_[0].list_,
+                    0, src_unknown, true, key_type());
+        it.find_next();
+
+        return it;
+    }
+
+    //! iterator pointing to the end of the hash-map (iterator-type as
+    //! template-parameter)
+    template <class Iterator>
+    Iterator _end() const
+    {
+        self_type* non_const_this = (self_type*)this;
+        return Iterator(non_const_this);
+    }
+
+public:
+    //! Returns an iterator pointing to the beginning of the hash-map
+    iterator begin() { return _begin<iterator>(); }
+
+    //! Returns a const_interator pointing to the beginning of the hash-map
+    const_iterator begin() const { return _begin<const_iterator>(); }
+
+    //! Returns an iterator pointing to the end of the hash-map
+    iterator end() { return _end<iterator>(); }
+
+    //! Returns a const_iterator pointing to the end of the hash-map
+    const_iterator end() const { return _end<const_iterator>(); }
+
+protected:
+    //! Allocate a new buffer-node
+    node_type * _get_node()
+    {
+        return node_allocator_.allocate(1);
+    }
+
+    //! Free given node
+    void _put_node(node_type* node)
+    {
+        node_allocator_.deallocate(node, 1);
+    }
+
+    //! Allocate a new buffer-node and initialize with given value, node and
+    //! deleted-flag
+    node_type * _new_node(const value_type& value, node_type* nxt, bool del)
+    {
+        node_type* node = _get_node();
+        node->value_ = value;
+        node->set_next(nxt);
+        node->set_deleted(del);
+        return node;
+    }
+
+    //! Free nodes in range [first, last). If last is NULL all nodes will be
+    //! freed.
+    void _erase_nodes(node_type* first, node_type* last)
+    {
+        node_type* curr = first;
+        while (curr != last)
+        {
+            node_type* next = curr->next();
+            _put_node(curr);
+            curr = next;
+        }
+    }
+
+    //! Bucket-index for values with given key
+    internal_size_type _bkt_num(const key_type& key) const
+    {
+        return _bkt_num(key, buckets_.size());
+    }
+
+    /*!
+     * Bucket-index for values with given key. The total number of buckets has
+     * to be specified as well.  The bucket number is determined by \f$
+     * bucket_num = (hash/max_hash)*n_buckets \f$ max_hash is in fact 2^63-1
+     * (internal_size_type=uint64 (or uint32)) but we rather divide by 2^64, so
+     * we can use plain integer arithmetic easily (there should be only a small
+     * difference): this way we must only calculate the upper 64 bits of the
+     * product hash*n_buckets and we're done. See
+     * http://www.cs.uaf.edu/~cs301/notes/Chapter5/node5.html
+    */
+    internal_size_type _bkt_num(const key_type& key, internal_size_type n) const
+    {
+        //! TODO maybe specialize double arithmetic to integer. the old code
+        //! was faulty -tb.
+        return (internal_size_type)(
+            (double)n * ((double)hash_(key) / (double)std::numeric_limits<internal_size_type>::max())
+            );
+    }
+
+    /*!
+     * Locate the given key in the internal-memory chained list.  If the key is
+     * not present, the node with the biggest key smaller than the given key is
+     * returned.  Note that the returned value may be zero: either because the
+     * chained list is empty or because the given key is smaller than all other
+     * keys in the chained list.
+     */
+    node_type*
+    _find_key_internal(const bucket_type& bucket, const key_type& key) const
+    {
+        node_type* old = NULL;
+        for (node_type* curr = bucket.list_;
+             curr && _leq(curr->value_.first, key);
+             curr = curr->next())
+        {
+            old = curr;
+        }
+        return old;
+    }
+
+    /*!
+     * Search for key in external part of bucket. Return value is (i_external,
+     * value), where i_ext = bucket._num_external if key could not be found.
+     */
+    tuple<external_size_type, value_type>
+    _find_key_external(const bucket_type& bucket, const key_type& key) const
+    {
+        subblock_type* subblock;
+
+        // number of subblocks occupied by bucket
+        internal_size_type n_subblocks = (internal_size_type)(
+            bucket.n_external_ / subblock_size
+            );
+        if (bucket.n_external_ % subblock_size != 0)
+            n_subblocks++;
+
+        for (internal_size_type i_subblock = 0;
+             i_subblock < n_subblocks; i_subblock++)
+        {
+            subblock = _load_subblock(bucket, i_subblock);
+            // number of values in i-th subblock
+            internal_size_type n_values =
+                (i_subblock + 1 < n_subblocks)
+                ? (internal_size_type)subblock_size
+                : (internal_size_type)(
+                    bucket.n_external_ - i_subblock * subblock_size
+                    );
+
+            //! TODO: replace with bucket.n_external_ % subblock_size
+
+            // biggest key in current subblock still too small => next subblock
+            if (_lt((*subblock)[n_values - 1].first, key))
+                continue;
+
+            // binary search in current subblock
+            internal_size_type i_lower = 0, i_upper = n_values;
+            while (i_lower + 1 != i_upper)
+            {
+                internal_size_type i_middle = (i_lower + i_upper) / 2;
+                if (_leq((*subblock)[i_middle].first, key))
+                    i_lower = i_middle;
+                else
+                    i_upper = i_middle;
+            }
+
+            value_type value = (*subblock)[i_lower];
+
+            if (_eq(value.first, key))
+                return tuple<external_size_type, value_type>
+                           (i_subblock * subblock_size + i_lower, value);
+            else
+                return tuple<external_size_type, value_type>
+                           (bucket.n_external_, value_type());
+        }
+
+        return tuple<external_size_type, value_type>
+                   (bucket.n_external_, value_type());
+    }
+
+    /*!
+     * Load the given bucket's i-th subblock.
+     * Since a bucket may be spread over several blocks, we must
+     * 1. determine in which block the requested subblock is located
+     * 2. at which position within the obove-mentioned block the questioned subblock is located
+     */
+    subblock_type*
+    _load_subblock(const bucket_type& bucket, internal_size_type which_subblock) const
+    {
+        n_subblocks_loaded++;
+
+        // index of the requested subblock counted from the very beginning of
+        // the bucket's first block
+        external_size_type i_abs_subblock = bucket.i_subblock_ + which_subblock;
+
+        /* 1. */
+        bid_type bid = bids_[bucket.i_block_ + (internal_size_type)(i_abs_subblock / subblocks_per_block)];
+        /* 2. */
+        internal_size_type i_subblock_within = (internal_size_type)(i_abs_subblock % subblocks_per_block);
+
+        return block_cache_.get_subblock(bid, i_subblock_within);
+    }
+
+    typedef HashedValue<self_type> hashed_value_type;
+
+    //! Functor to extracts the actual value from a HashedValue-struct
+    struct HashedValueExtractor
+    {
+        value_type& operator () (hashed_value_type& hvalue)
+        { return hvalue.value_; }
+    };
+
+    /*!
+     * Will return from its input-stream all values that are to be stored in
+     * the given bucket.  Those values must appear in consecutive order
+     * beginning with the input-stream's current value.
+     */
+    template <class InputStream, class ValueExtractor>
+    struct HashingStream
+    {
+        typedef typename InputStream::value_type value_type;
+
+        self_type* map_;
+        InputStream& input_;
+        internal_size_type i_bucket_;
+        external_size_type bucket_size_;
+        value_type value_;
+        bool empty_;
+        ValueExtractor vextract_;
+
+        HashingStream(InputStream& input, internal_size_type i_bucket,
+                      ValueExtractor vextract, self_type* map)
+            : map_(map),
+              input_(input),
+              i_bucket_(i_bucket),
+              bucket_size_(0),
+              vextract_(vextract)
+        {
+            empty_ = find_next();
+        }
+
+        const value_type& operator * () { return value_; }
+
+        bool empty() const { return empty_; }
+
+        void operator ++ ()
+        {
+            ++input_;
+            empty_ = find_next();
+        }
+
+        bool find_next()
+        {
+            if (input_.empty())
+                return true;
+            value_ = *input_;
+            if (map_->_bkt_num(vextract_(value_).first) != i_bucket_)
+                return true;
+
+            ++bucket_size_;
+            return false;
+        }
+    };
+
+    /*	Rebuild hash-map. The desired number of buckets may be supplied. */
+    void _rebuild_buckets(internal_size_type n_desired = 0)
+    {
+        STXXL_VERBOSE_HASH_MAP("_rebuild_buckets()");
+
+        typedef buffered_writer<block_type, bid_container_type> writer_type;
+        typedef HashedValuesStream<self_type, reader_type> values_stream_type;
+        typedef HashingStream<values_stream_type, HashedValueExtractor> hashing_stream_type;
+
+        const int_type write_buffer_size = config::get_instance()->disks_number() * 4;
+
+        // determine new number of buckets from desired load_factor ...
+        internal_size_type n_new;
+        n_new = (internal_size_type)ceil((double)num_total_ / ((double)subblock_size * (double)opt_load_factor()));
+
+        // ... but give the user the chance to request even more buckets
+        if (n_desired > n_new)
+            n_new = std::min<internal_size_type>(n_desired, max_bucket_count());
+
+        // allocate new buckets and bids
+        buckets_container_type old_buckets(n_new);
+        std::swap(buckets_, old_buckets);
+
+        bid_container_type old_bids;
+        std::swap(bids_, old_bids);
+
+        // read stored values in consecutive order
+
+        // use new to control point of destruction (see below)
+        reader_type* reader
+            = new reader_type(old_bids.begin(), old_bids.end(), block_cache_);
+
+        values_stream_type values_stream(old_buckets.begin(), old_buckets.end(),
+                                         *reader, old_bids.begin(), *this);
+
+        writer_type writer(&bids_, write_buffer_size, write_buffer_size / 2);
+
+        // re-distribute values among new buckets.
+
+        // this makes use of the fact that if value1 preceeds value2 before
+        // resizing, value1 will preceed value2 after resizing as well (uniform
+        // rehashing)
+        num_total_ = 0;
+        for (internal_size_type i_bucket = 0;
+             i_bucket < buckets_.size(); i_bucket++)
+        {
+            buckets_[i_bucket] = bucket_type();
+            buckets_[i_bucket].i_block_ = writer.i_block();
+            buckets_[i_bucket].i_subblock_ = writer.i_subblock();
+
+            hashing_stream_type hasher(values_stream, i_bucket, HashedValueExtractor(), this);
+            external_size_type i_ext = 0;
+            while (!hasher.empty())
+            {
+                const hashed_value_type& hvalue = *hasher;
+                iterator_map_.fix_iterators_2ext(hvalue.i_bucket_, hvalue.value_.first, i_bucket, i_ext);
+
+                writer.append(hvalue.value_);
+                ++hasher;
+                ++i_ext;
+            }
+
+            writer.finish_subblock();
+            buckets_[i_bucket].n_external_ = hasher.bucket_size_;
+            num_total_ += hasher.bucket_size_;
+        }
+        writer.flush();
+        // reader must be deleted before deleting old_bids because its
+        // destructor will dereference the bid-iterator
+        delete reader;
+        block_cache_.clear();
+
+        // get rid of old blocks and buckets
+        block_manager* bm = stxxl::block_manager::get_instance();
+        bm->delete_blocks(old_bids.begin(), old_bids.end());
+
+        for (internal_size_type i_bucket = 0;
+             i_bucket < old_buckets.size(); i_bucket++)
+        {
+            _erase_nodes(old_buckets[i_bucket].list_, NULL);
+            old_buckets[i_bucket] = bucket_type();
+        }
+
+        buffer_size_ = 0;
+        oblivious_ = false;
+    }
+
+    /*!
+     * Stream for filtering duplicates. Used to eliminate duplicated values
+     * when bulk-inserting Note: input has to be sorted, so that duplicates
+     * will occure in row
+     */
+    template <class InputStream>
+    struct UniqueValueStream
+    {
+        typedef typename InputStream::value_type value_type;
+        self_type& map_;
+        InputStream& in_;
+
+        UniqueValueStream(InputStream& input, self_type& map)
+            : map_(map), in_(input)
+        { }
+
+        bool empty() const { return in_.empty(); }
+
+        const value_type& operator * () { return *in_; }
+
+        void operator ++ ()
+        {
+            value_type v_old = *in_;
+            ++in_;
+            while (!in_.empty() && v_old.first == (*in_).first)
+                ++in_;
+        }
+    };
+
+    template <class InputStream>
+    struct AddHashStream
+    {
+        //! (hash,value)
+        typedef std::pair<internal_size_type, typename InputStream::value_type> value_type;
+        self_type& map_;
+        InputStream& in_;
+
+        AddHashStream(InputStream& input, self_type& map)
+            : map_(map), in_(input)
+        { }
+
+        bool empty() const { return in_.empty(); }
+
+        value_type operator * ()
+        { return value_type(map_.hash_((*in_).first), *in_); }
+
+        void operator ++ () { ++in_; }
+    };
+
+    /*!
+     * Extracts the value-part (ignoring the hashvalue); required by
+     * HashingStream (see above)
+     */
+    struct StripHashFunctor
+    {
+        const value_type& operator () (std::pair<internal_size_type, value_type>& v)
+        { return v.second; }
+    };
+
+    /*!
+     * Comparator object for values as required by stxxl::sort. Sorting is done
+     * lexicographically by <hash-value, key> Note: the hash-value has already
+     * been computed.
+     */
+    struct Cmp : public std::binary_function<
+                     std::pair<internal_size_type, value_type>,
+                     std::pair<internal_size_type, value_type>, bool
+                     >
+    {
+        self_type& map_;
+        Cmp(self_type& map) : map_(map) { }
+
+        bool operator () (const std::pair<internal_size_type, value_type>& a,
+                          const std::pair<internal_size_type, value_type>& b) const
+        {
+            return (a.first < b.first) ||
+                   ((a.first == b.first) && map_.cmp_(a.second.first, b.second.first));
+        }
+        std::pair<internal_size_type, value_type> min_value() const
+        {
+            return std::pair<internal_size_type, value_type>(
+                std::numeric_limits<internal_size_type>::min(),
+                value_type(map_.cmp_.min_value(), mapped_type())
+                );
+        }
+        std::pair<internal_size_type, value_type> max_value() const
+        {
+            return std::pair<internal_size_type, value_type>(
+                std::numeric_limits<internal_size_type>::max(),
+                value_type(map_.cmp_.max_value(), mapped_type())
+                );
+        }
+    };
+
+public:
+    //! Bulk-insert of values in the range [f, l)
+    //! \param f beginning of the range
+    //! \param l end of the range
+    //! \param mem internal memory that may be used (note: this memory will be used additionally to the buffer). The more the better
+    template <class InputIterator>
+    void insert(InputIterator f, InputIterator l, internal_size_type mem)
+    {
+        //! values already stored in the hashtable ("old values")
+        typedef HashedValuesStream<self_type, reader_type> old_values_stream;
+        //! old values, that are to be stored in a certain (new) bucket
+        typedef HashingStream<old_values_stream, HashedValueExtractor> old_hashing_stream;
+
+        //! values to insert ("new values")
+        typedef typename stxxl::stream::streamify_traits<InputIterator>::stream_type input_stream;
+
+        //! new values with added hash: (hash, (key, mapped))
+        typedef AddHashStream<input_stream> new_values_stream;
+        //! new values sorted by <hash-value, key>
+        typedef stxxl::stream::sort<new_values_stream, Cmp> new_sorted_values_stream;
+        //! new values sorted by <hash-value, key> with duplicates eliminated
+        typedef UniqueValueStream<new_sorted_values_stream> new_unique_values_stream;
+        //! new values, that are to be stored in a certain bucket
+        typedef HashingStream<new_unique_values_stream, StripHashFunctor> new_hashing_stream;
+
+        typedef buffered_writer<block_type, bid_container_type> writer_type;
+
+        int_type write_buffer_size = config::get_instance()->disks_number() * 2;
+
+        // calculate new number of buckets
+        external_size_type num_total_new = num_total_ + (l - f);         // estimated number of elements
+        external_size_type n_buckets_new = (external_size_type)ceil((double)num_total_new / ((double)subblock_size * (double)opt_load_factor()));
+        if (n_buckets_new > max_bucket_count())
+            n_buckets_new = max_bucket_count();
+
+        STXXL_VERBOSE_HASH_MAP("insert() items=" << (l - f) << " buckets_new=" << n_buckets_new);
+
+        // prepare new buckets and bids
+        buckets_container_type old_buckets((internal_size_type)n_buckets_new);
+        std::swap(buckets_, old_buckets);
+        // writer will allocate new blocks as necessary
+        bid_container_type old_bids;
+        std::swap(bids_, old_bids);
+
+        // already stored values ("old values")
+        reader_type* reader = new reader_type(old_bids.begin(), old_bids.end(),
+                                              block_cache_);
+        old_values_stream old_values(old_buckets.begin(), old_buckets.end(),
+                                     *reader, old_bids.begin(), *this);
+
+        // values to insert ("new values")
+        input_stream input = stxxl::stream::streamify(f, l);
+        new_values_stream new_values(input, *this);
+        new_sorted_values_stream new_sorted_values(new_values, Cmp(*this), mem);
+        new_unique_values_stream new_unique_values(new_sorted_values, *this);
+
+        writer_type writer(&bids_, write_buffer_size, write_buffer_size / 2);
+
+        num_total_ = 0;
+        for (internal_size_type i_bucket = 0; i_bucket < buckets_.size(); i_bucket++)
+        {
+            buckets_[i_bucket] = bucket_type();
+            buckets_[i_bucket].i_block_ = writer.i_block();
+            buckets_[i_bucket].i_subblock_ = writer.i_subblock();
+
+            old_hashing_stream old_hasher(old_values, i_bucket, HashedValueExtractor(), this);
+            new_hashing_stream new_hasher(new_unique_values, i_bucket, StripHashFunctor(), this);
+            internal_size_type bucket_size = 0;
+
+            // more old and new values for the current bucket => choose smallest
+            while (!old_hasher.empty() && !new_hasher.empty())
+            {
+                internal_size_type old_hash = hash_((*old_hasher).value_.first);
+                internal_size_type new_hash = (*new_hasher).first;
+                key_type old_key = (*old_hasher).value_.first;
+                key_type new_key = (*new_hasher).second.first;
+
+                // old value wins
+                if ((old_hash < new_hash) || (old_hash == new_hash && cmp_(old_key, new_key)))                // (_lt((*old_hasher)._value.first, (*new_hasher).second.first))
+                {
+                    const hashed_value_type& hvalue = *old_hasher;
+                    iterator_map_.fix_iterators_2ext(hvalue.i_bucket_, hvalue.value_.first, i_bucket, bucket_size);
+                    writer.append(hvalue.value_);
+                    ++old_hasher;
+                }
+                // new value smaller or equal => new value wins
+                else
+                {
+                    if (_eq(old_key, new_key))
+                    {
+                        const hashed_value_type& hvalue = *old_hasher;
+                        iterator_map_.fix_iterators_2ext(hvalue.i_bucket_, hvalue.value_.first, i_bucket, bucket_size);
+                        ++old_hasher;
+                    }
+                    writer.append((*new_hasher).second);
+                    ++new_hasher;
+                }
+                ++bucket_size;
+            }
+            // no more new values for the current bucket
+            while (!old_hasher.empty())
+            {
+                const hashed_value_type& hvalue = *old_hasher;
+                iterator_map_.fix_iterators_2ext(hvalue.i_bucket_, hvalue.value_.first, i_bucket, bucket_size);
+                writer.append(hvalue.value_);
+                ++old_hasher;
+                ++bucket_size;
+            }
+            // no more old values for the current bucket
+            while (!new_hasher.empty())
+            {
+                writer.append((*new_hasher).second);
+                ++new_hasher;
+                ++bucket_size;
+            }
+
+            writer.finish_subblock();
+            buckets_[i_bucket].n_external_ = bucket_size;
+            num_total_ += bucket_size;
+        }
+        writer.flush();
+        delete reader;
+        block_cache_.clear();
+
+        // release old blocks
+        block_manager* bm = stxxl::block_manager::get_instance();
+        bm->delete_blocks(old_bids.begin(), old_bids.end());
+
+        // free nodes in old bucket lists
+        for (internal_size_type i_bucket = 0;
+             i_bucket < old_buckets.size(); i_bucket++)
+        {
+            _erase_nodes(old_buckets[i_bucket].list_, NULL);
+            old_buckets[i_bucket] = bucket_type();
+        }
+
+        buffer_size_ = 0;
+        oblivious_ = false;
+    }
+
+protected:
+    /* 1 iff a <  b
+       The comparison is done lexicographically by (hash-value, key)
+    */
+    bool _lt(const key_type& a, const key_type& b) const
+    {
+        internal_size_type hash_a = hash_(a);
+        internal_size_type hash_b = hash_(b);
+
+        return (hash_a < hash_b) ||
+               ((hash_a == hash_b) && cmp_(a, b));
+    }
+
+    //! true iff a >  b
+    bool _gt(const key_type& a, const key_type& b) const { return _lt(b, a); }
+    //! true iff a <= b
+    bool _leq(const key_type& a, const key_type& b) const { return !_gt(a, b); }
+    //! true iff a >= b
+    bool _geq(const key_type& a, const key_type& b) const { return !_lt(a, b); }
+
+    //! true iff a == b. note: it is mandatory that equal keys yield equal
+    //! hash-values => hashing not neccessary for equality-testing.
+    bool _eq(const key_type& a, const key_type& b) const
+    { return !cmp_(a, b) && !cmp_(b, a); }
+
+    friend class hash_map_iterator_base<self_type>;
+    friend class hash_map_iterator<self_type>;
+    friend class hash_map_const_iterator<self_type>;
+    friend class iterator_map<self_type>;
+    friend class block_cache<block_type>;
+    friend struct HashedValuesStream<self_type, reader_type>;
+
+#if 1
+    void _dump_external()
+    {
+        reader_type reader(bids_.begin(), bids_.end(), &block_cache_);
+
+        for (internal_size_type i_block = 0; i_block < bids_.size(); i_block++) {
+            std::cout << "block " << i_block << ":\n";
+
+            for (internal_size_type i_subblock = 0; i_subblock < subblocks_per_block; i_subblock++) {
+                std::cout << "  subblock " << i_subblock << ":\n    ";
+
+                for (external_size_type i_element = 0; i_element < subblocks_per_block; i_element++) {
+                    std::cout << reader.const_value().first << ", ";
+                    ++reader;
+                }
+                std::cout << std::endl;
+            }
+        }
+    }
+
+    void _dump_buckets()
+    {
+        reader_type reader(bids_.begin(), bids_.end(), &block_cache_);
+
+        std::cout << "number of buckets: " << buckets_.size() << std::endl;
+        for (internal_size_type i_bucket = 0; i_bucket < buckets_.size(); i_bucket++) {
+            const bucket_type& bucket = buckets_[i_bucket];
+            reader.skip_to(bids_.begin() + bucket.i_block_, bucket.i_subblock_);
+
+            std::cout << "  bucket " << i_bucket << ": block=" << bucket.i_block_ << ", subblock=" << bucket.i_subblock_ << ", external=" << bucket.n_external_ << std::endl;
+
+            node_type* node = bucket.list_;
+            std::cout << "     internal_list=";
+            while (node) {
+                std::cout << node->value_.first << " (del=" << node->deleted() << "), ";
+                node = node->next();
+            }
+            std::cout << std::endl;
+
+            std::cout << "     external=";
+            for (external_size_type i_element = 0; i_element < bucket.n_external_; i_element++) {
+                std::cout << reader.const_value().first << ", ";
+                ++reader;
+            }
+            std::cout << std::endl;
+        }
+    }
+
+    void _dump_bucket_statistics()
+    {
+        std::cout << "number of buckets: " << buckets_.size() << std::endl;
+        for (internal_size_type i_bucket = 0; i_bucket < buckets_.size(); i_bucket++) {
+            const bucket_type& bucket = buckets_[i_bucket];
+            std::cout << "  bucket " << i_bucket << ": block=" << bucket.i_block_ << ", subblock=" << bucket.i_subblock_ << ", external=" << bucket.n_external_ << ", list=" << bucket.list_ << std::endl;
+        }
+    }
+#endif
+
+public:
+    //! Construct an equality predicate from the comparison operator
+    struct equal_to : public std::binary_function<key_type, key_type, bool>
+    {
+        //! reference to hash_map
+        const self_type& m_map;
+
+        //! constructor requires reference to hash_map
+        equal_to(const self_type& map) : m_map(map) { }
+
+        //! return whether the arguments compare equal (x==y).
+        bool operator () (const key_type& x, const key_type& y) const
+        {
+            return m_map._eq(x, y);
+        }
+
+        //! C++11 required type
+        typedef key_type first_argument_type;
+        //! C++11 required type
+        typedef key_type second_argument_type;
+        //! C++11 required type
+        typedef bool result_type;
+    };
+
+    //! Type of constructed equality predicate
+    typedef equal_to key_equal;
+
+    //! Constructed equality predicate used by this hash-map
+    key_equal key_eq() const
+    {
+        return equal_to(*this);
+    }
+
+public:
+    //! Even more statistics: Number of buckets, number of values, buffer-size,
+    //! values per bucket
+    void print_load_statistics(std::ostream& o = std::cout) const
+    {
+        external_size_type sum_external = 0;
+        external_size_type square_sum_external = 0;
+        external_size_type max_external = 0;
+
+        for (internal_size_type i_bucket = 0; i_bucket < buckets_.size(); i_bucket++)
+        {
+            const bucket_type& b = buckets_[i_bucket];
+
+            sum_external += b.n_external_;
+            square_sum_external += b.n_external_ * b.n_external_;
+            if (b.n_external_ > max_external)
+                max_external = b.n_external_;
+        }
+
+        double avg_external = (double)sum_external / (double)buckets_.size();
+        double std_external = sqrt(((double)square_sum_external / (double)buckets_.size()) - (avg_external * avg_external));
+
+        o << "Bucket count         : " << buckets_.size() << std::endl;
+        o << "Values total         : " << num_total_ << std::endl;
+        o << "Values buffered      : " << buffer_size_ << std::endl;
+        o << "Max Buffer-Size      : " << max_buffer_size_ << std::endl;
+        o << "Max external/bucket  : " << max_external << std::endl;
+        o << "Avg external/bucket  : " << avg_external << std::endl;
+        o << "Std external/bucket  : " << std_external << std::endl;
+        o << "Load-factor          : " << load_factor() << std::endl;
+        o << "Blocks allocated     : " << bids_.size() << " => " << (bids_.size() * block_type::raw_size) << " bytes" << std::endl;
+        o << "Bytes per value      : " << ((double)(bids_.size() * block_type::raw_size) / (double)num_total_) << std::endl;
+    }
+};     /* end of class hash_map */
+
+} // namespace hash_map
+
+STXXL_END_NAMESPACE
+
+namespace std {
+
+template <class KeyType, class MappedType, class HashType, class KeyCompareType,
+          unsigned SubBlockSize, unsigned SubBlocksPerBlock, class AllocType>
+void swap(stxxl::hash_map::hash_map<KeyType, MappedType, HashType, KeyCompareType,
+                                    SubBlockSize, SubBlocksPerBlock, AllocType>& a,
+          stxxl::hash_map::hash_map<KeyType, MappedType, HashType, KeyCompareType,
+                                    SubBlockSize, SubBlocksPerBlock, AllocType>& b)
+{
+    if (&a != &b)
+        a.swap(b);
+}
+
+} // namespace std
+
+#endif // !STXXL_CONTAINERS_HASH_MAP_HASH_MAP_HEADER
diff --git a/include/stxxl/bits/containers/hash_map/iterator.h b/include/stxxl/bits/containers/hash_map/iterator.h
new file mode 100644
index 0000000..9aa95ae
--- /dev/null
+++ b/include/stxxl/bits/containers/hash_map/iterator.h
@@ -0,0 +1,587 @@
+/***************************************************************************
+ *  include/stxxl/bits/containers/hash_map/iterator.h
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2007 Markus Westphal <marwes at users.sourceforge.net>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#ifndef STXXL_CONTAINERS_HASH_MAP_ITERATOR_HEADER
+#define STXXL_CONTAINERS_HASH_MAP_ITERATOR_HEADER
+
+#include <stxxl/bits/namespace.h>
+#include <stxxl/bits/mng/block_manager.h>
+
+#include <stxxl/bits/containers/hash_map/util.h>
+
+STXXL_BEGIN_NAMESPACE
+
+namespace hash_map {
+
+template <class HashMap>
+class iterator_map;
+template <class HashMap>
+class hash_map_iterator;
+template <class HashMap>
+class hash_map_const_iterator;
+template <class HashMap>
+class block_cache;
+
+template <class HashMap>
+class hash_map_iterator_base
+{
+public:
+    friend class iterator_map<HashMap>;
+    friend void HashMap::erase(hash_map_const_iterator<HashMap> it);
+
+    typedef HashMap hash_map_type;
+    typedef typename hash_map_type::internal_size_type internal_size_type;
+    typedef typename hash_map_type::external_size_type external_size_type;
+    typedef typename hash_map_type::value_type value_type;
+    typedef typename hash_map_type::key_type key_type;
+    typedef typename hash_map_type::reference reference;
+    typedef typename hash_map_type::const_reference const_reference;
+    typedef typename hash_map_type::node_type node_type;
+    typedef typename hash_map_type::bucket_type bucket_type;
+    typedef typename hash_map_type::bid_iterator_type bid_iterator_type;
+    typedef typename hash_map_type::source_type source_type;
+
+    typedef buffered_reader<typename hash_map_type::block_cache_type, bid_iterator_type> reader_type;
+
+    typedef std::forward_iterator_tag iterator_category;
+
+protected:
+    HashMap* map_;
+    reader_type* reader_;
+    //! true if prefetching enabled; false by default, will be set to true when
+    //! incrementing (see find_next())
+    bool prefetch_;
+    //! index of current bucket
+    internal_size_type i_bucket_;
+    //! source of current value: external or internal
+    source_type source_;
+    //! current (source=internal) or old (src=external) internal node
+    node_type* node_;
+    //! position of current (source=external) or next (source=internal)
+    //! external value (see _ext_valid)
+    external_size_type i_external_;
+    //! key of current value
+    key_type key_;
+    /*! true if i_external points to the current or next external value
+
+      example: iterator was created by hash_map::find() and the value was found
+      in internal memory
+
+      => iterator pointing to internal node is created and location of next
+      external value is unknown (_ext_valid == false)
+
+      => when incrementing the iterator the external values will be scanned
+      from the beginning of the bucket to find the valid external index
+    */
+    bool ext_valid_;
+    //! true if iterator equals end()
+    bool end_;
+
+public:
+    //! Construct a new iterator
+    hash_map_iterator_base(HashMap* map, internal_size_type i_bucket, node_type* node,
+                           external_size_type i_external, source_type source,
+                           bool ext_valid, key_type key)
+        : map_(map),
+          reader_(NULL),
+          prefetch_(false),
+          i_bucket_(i_bucket),
+          source_(source),
+          node_(node),
+          i_external_(i_external),
+          key_(key),
+          ext_valid_(ext_valid),
+          end_(false)
+    {
+        STXXL_VERBOSE3("hash_map_iterator_base parameter construct addr=" << this);
+        map_->iterator_map_.register_iterator(*this);
+    }
+
+    //! Construct a new iterator pointing to the end of the given hash-map.
+    hash_map_iterator_base(hash_map_type* map)
+        : map_(map),
+          reader_(NULL),
+          prefetch_(false),
+          i_bucket_(0),
+          source_(hash_map_type::src_unknown),
+          node_(NULL),
+          i_external_(0),
+          ext_valid_(false),
+          end_(true)
+    { }
+
+    //! Construct a new iterator from an existing one
+    hash_map_iterator_base(const hash_map_iterator_base& obj)
+        : map_(obj.map_),
+          reader_(NULL),
+          prefetch_(obj.prefetch_),
+          i_bucket_(obj.i_bucket_),
+          source_(obj.source_),
+          node_(obj.node_),
+          i_external_(obj.i_external_),
+          key_(obj.key_),
+          ext_valid_(obj.ext_valid_),
+          end_(obj.end_)
+    {
+        STXXL_VERBOSE3("hash_map_iterator_base constr from" << (&obj) << " to " << this);
+
+        if (!end_ && map_)
+            map_->iterator_map_.register_iterator(*this);
+    }
+
+    //! Assignment operator
+    hash_map_iterator_base& operator = (const hash_map_iterator_base& obj)
+    {
+        STXXL_VERBOSE3("hash_map_iterator_base copy from" << (&obj) << " to " << this);
+
+        if (&obj != this)
+        {
+            if (map_ && !end_)
+                map_->iterator_map_.unregister_iterator(*this);
+
+            reset_reader();
+
+            map_ = obj.map_;
+            i_bucket_ = obj.i_bucket_;
+            node_ = obj.node_;
+            source_ = obj.source_;
+            i_external_ = obj.i_external_;
+            ext_valid_ = obj.ext_valid_;
+            prefetch_ = obj.prefetch_;
+            end_ = obj.end_;
+            key_ = obj.key_;
+
+            if (map_ && !end_)
+                map_->iterator_map_.register_iterator(*this);
+        }
+        return *this;
+    }
+
+    //! Two iterators are equal if the point to the same value in the same map
+    bool operator == (const hash_map_iterator_base& obj) const
+    {
+        if (end_ && obj.end_)
+            return true;
+
+        if (map_ != obj.map_ ||
+            i_bucket_ != obj.i_bucket_ ||
+            source_ != obj.source_)
+            return false;
+
+        if (source_ == hash_map_type::src_internal)
+            return node_ == obj.node_;
+        else
+            return i_external_ == obj.i_external_;
+    }
+
+    bool operator != (const hash_map_iterator_base& obj) const
+    {
+        return ! operator == (obj);
+    }
+
+protected:
+    //! Initialize reader object to scan external values
+    void init_reader()
+    {
+        const bucket_type& bucket = map_->buckets_[i_bucket_];
+
+        bid_iterator_type begin = map_->bids_.begin() + bucket.i_block_;
+        bid_iterator_type end = map_->bids_.end();
+
+        reader_ = new reader_type(begin, end, map_->block_cache_,
+                                  bucket.i_subblock_, prefetch_);
+
+        // external value's index already known
+        if (ext_valid_)
+        {
+            // TODO: speed this up (go directly to i_external_
+            for (external_size_type i = 0; i < i_external_; i++)
+                ++(*reader_);
+        }
+        // otw lookup external value.
+        // case I: no internal value => first external value is the desired one
+        else if (node_ == NULL)
+        {
+            i_external_ = 0;
+            ext_valid_ = true;
+        }
+        // case II: search for smallest external value > internal value
+        else
+        {
+            i_external_ = 0;
+            while (i_external_ < bucket.n_external_)
+            {
+                if (map_->_gt(reader_->const_value().first, node_->value_.first))
+                    break;
+
+                ++(*reader_);
+                ++i_external_;
+            }
+            // note: i_external==num_external just means that there was no
+            // external value > internal value (which is perfectly OK)
+            ext_valid_ = true;
+        }
+    }
+
+    //! Reset reader-object
+    void reset_reader()
+    {
+        if (reader_) {
+            delete reader_;
+            reader_ = NULL;
+        }
+    }
+
+public:
+    //! Advance iterator to the next value
+    //! The next value is determined in the following way
+    //!	- if there are remaining internal or external values in the current
+    //!	  bucket, choose the smallest among them, that is not marked as deleted
+    //!	- otherwise continue with the next bucket
+    void find_next(bool start_prefetching = false)
+    {
+        // invariant: current external value is always > current internal value
+        assert(!end_);
+
+        internal_size_type i_bucket_old = i_bucket_;
+        bucket_type bucket = map_->buckets_[i_bucket_];
+
+        if (reader_ == NULL)
+            init_reader();
+
+        // when incremented once, more increments are likely to follow;
+        // therefore start prefetching
+        if (start_prefetching && !prefetch_)
+        {
+            reader_->enable_prefetching();
+            prefetch_ = true;
+        }
+
+        // determine starting-points for comparision, which are given by:
+        // - tmp_node: smallest internal value > old value (tmp_node may be NULL)
+        // - reader_: smallest external value > old value (external value may not exists)
+        node_type* tmp_node = (node_) ? node_ : bucket.list_;
+        if (source_ == hash_map_type::src_external)
+        {
+            while (tmp_node && map_->_leq(tmp_node->value_.first, key_))
+                tmp_node = tmp_node->next();
+
+            ++i_external_;
+            ++(*reader_);
+        }
+        else if (source_ == hash_map_type::src_internal)
+            tmp_node = node_->next();
+        // else (source unknown): tmp_node and reader_ already point to the
+        // correct values
+
+        while (true) {
+            // internal and external values available
+            while (tmp_node && i_external_ < bucket.n_external_)
+            {
+                // internal value less or equal external value => internal wins
+                if (map_->_leq(tmp_node->value_.first, reader_->const_value().first))
+                {
+                    node_ = tmp_node;
+                    if (map_->_eq(node_->value_.first, reader_->const_value().first))
+                    {
+                        ++i_external_;
+                        ++(*reader_);
+                    }
+
+                    if (!node_->deleted())
+                    {
+                        key_ = node_->value_.first;
+                        source_ = hash_map_type::src_internal;
+                        goto end_search;       // just this once - I promise...
+                    }
+                    else
+                        // continue search if internal value flaged as deleted
+                        tmp_node = tmp_node->next();
+                }
+                // otherwise external wins
+                else
+                {
+                    key_ = reader_->const_value().first;
+                    source_ = hash_map_type::src_external;
+                    goto end_search;
+                }
+            }
+            // only external values left
+            if (i_external_ < bucket.n_external_)
+            {
+                key_ = reader_->const_value().first;
+                source_ = hash_map_type::src_external;
+                goto end_search;
+            }
+            // only internal values left
+            while (tmp_node)
+            {
+                node_ = tmp_node;
+                if (!node_->deleted())
+                {
+                    key_ = node_->value_.first;
+                    source_ = hash_map_type::src_internal;
+                    goto end_search;
+                }
+                else
+                    tmp_node = tmp_node->next();        // continue search
+            }
+
+            // at this point there are obviously no more values in the current
+            // bucket let's try the next one (outer while-loop!)
+            i_bucket_++;
+            if (i_bucket_ == map_->buckets_.size())
+            {
+                end_ = true;
+                reset_reader();
+                goto end_search;
+            }
+            else
+            {
+                bucket = map_->buckets_[i_bucket_];
+                i_external_ = 0;
+                tmp_node = bucket.list_;
+                node_ = NULL;
+                reader_->skip_to(map_->bids_.begin() + bucket.i_block_, bucket.i_subblock_);
+            }
+        }
+
+end_search:
+        if (end_)
+        {
+            this->map_->iterator_map_.unregister_iterator(*this, i_bucket_old);
+        }
+        else if (i_bucket_old != i_bucket_)
+        {
+            this->map_->iterator_map_.unregister_iterator(*this, i_bucket_old);
+            this->map_->iterator_map_.register_iterator(*this, i_bucket_);
+        }
+    }
+
+    virtual ~hash_map_iterator_base()
+    {
+        STXXL_VERBOSE3("hash_map_iterator_base deconst " << this);
+
+        if (map_ && !end_)
+            map_->iterator_map_.unregister_iterator(*this);
+        reset_reader();
+    }
+};
+
+template <class HashMap>
+class hash_map_iterator : public hash_map_iterator_base<HashMap>
+{
+public:
+    typedef HashMap hash_map_type;
+    typedef typename hash_map_type::internal_size_type internal_size_type;
+    typedef typename hash_map_type::external_size_type external_size_type;
+    typedef typename hash_map_type::value_type value_type;
+    typedef typename hash_map_type::key_type key_type;
+    typedef typename hash_map_type::reference reference;
+    typedef typename hash_map_type::const_reference const_reference;
+    typedef typename hash_map_type::pointer pointer;
+    typedef typename hash_map_type::const_pointer const_pointer;
+    typedef typename hash_map_type::node_type node_type;
+    typedef typename hash_map_type::bucket_type bucket_type;
+    typedef typename hash_map_type::bid_iterator_type bid_iterator_type;
+    typedef typename hash_map_type::source_type source_type;
+
+    typedef buffered_reader<typename hash_map_type::block_cache_type,
+                            bid_iterator_type> reader_type;
+
+    typedef std::forward_iterator_tag iterator_category;
+
+    typedef stxxl::hash_map::hash_map_iterator_base<hash_map_type> base_type;
+    typedef stxxl::hash_map::hash_map_const_iterator<hash_map_type> hash_map_const_iterator;
+
+public:
+    hash_map_iterator(hash_map_type* map, internal_size_type i_bucket,
+                      node_type* node, external_size_type i_external,
+                      source_type source, bool ext_valid, key_type key)
+        : base_type(map, i_bucket, node, i_external, source, ext_valid, key)
+    { }
+
+    hash_map_iterator()
+        : base_type(NULL)
+    { }
+
+    hash_map_iterator(hash_map_type* map)
+        : base_type(map)
+    { }
+
+    hash_map_iterator(const hash_map_iterator& obj)
+        : base_type(obj)
+    { }
+
+    hash_map_iterator& operator = (const hash_map_iterator& obj)
+    {
+        base_type::operator = (obj);
+        return *this;
+    }
+
+    bool operator == (const hash_map_iterator& obj) const
+    {
+        return base_type::operator == (obj);
+    }
+
+    bool operator == (const hash_map_const_iterator& obj) const
+    {
+        return base_type::operator == (obj);
+    }
+
+    bool operator != (const hash_map_iterator& obj) const
+    {
+        return base_type::operator != (obj);
+    }
+
+    bool operator != (const hash_map_const_iterator& obj) const
+    {
+        return base_type::operator != (obj);
+    }
+
+    //! Return reference to current value. If source is external, mark the
+    //! value's block as dirty
+    reference operator * ()
+    {
+        if (this->source_ == hash_map_type::src_internal)
+        {
+            return this->node_->value_;
+        }
+        else
+        {
+            if (this->reader_ == NULL)
+                base_type::init_reader();
+
+            return this->reader_->value();
+        }
+    }
+
+    //! Return reference to current value. If source is external, mark the
+    //! value's block as dirty
+    pointer operator -> ()
+    {
+        return &operator * ();
+    }
+
+    //! Increment iterator
+    hash_map_iterator<hash_map_type>& operator ++ ()
+    {
+        base_type::find_next(true);
+        return *this;
+    }
+};
+
+template <class HashMap>
+class hash_map_const_iterator : public hash_map_iterator_base<HashMap>
+{
+public:
+    typedef HashMap hash_map_type;
+    typedef typename hash_map_type::internal_size_type internal_size_type;
+    typedef typename hash_map_type::external_size_type external_size_type;
+    typedef typename hash_map_type::value_type value_type;
+    typedef typename hash_map_type::key_type key_type;
+    typedef typename hash_map_type::reference reference;
+    typedef typename hash_map_type::const_reference const_reference;
+    typedef typename hash_map_type::pointer pointer;
+    typedef typename hash_map_type::const_pointer const_pointer;
+    typedef typename hash_map_type::node_type node_type;
+    typedef typename hash_map_type::bucket_type bucket_type;
+    typedef typename hash_map_type::bid_iterator_type bid_iterator_type;
+    typedef typename hash_map_type::source_type source_type;
+
+    typedef buffered_reader<typename hash_map_type::block_cache_type,
+                            bid_iterator_type> reader_type;
+
+    typedef std::forward_iterator_tag iterator_category;
+
+    typedef stxxl::hash_map::hash_map_iterator_base<hash_map_type> base_type;
+    typedef stxxl::hash_map::hash_map_iterator<hash_map_type> hash_map_iterator;
+
+public:
+    hash_map_const_iterator(hash_map_type* map, internal_size_type i_bucket,
+                            node_type* node, external_size_type i_external,
+                            source_type source, bool ext_valid, key_type key)
+        : base_type(map, i_bucket, node, i_external, source, ext_valid, key)
+    { }
+
+    hash_map_const_iterator()
+        : base_type(NULL)
+    { }
+
+    hash_map_const_iterator(hash_map_type* map)
+        : base_type(map)
+    { }
+
+    hash_map_const_iterator(const hash_map_iterator& obj)
+        : base_type(obj)
+    { }
+
+    hash_map_const_iterator(const hash_map_const_iterator& obj)
+        : base_type(obj)
+    { }
+
+    hash_map_const_iterator& operator = (const hash_map_const_iterator& obj)
+    {
+        base_type::operator = (obj);
+        return *this;
+    }
+
+    bool operator == (const hash_map_const_iterator& obj) const
+    {
+        return base_type::operator == (obj);
+    }
+
+    bool operator == (const hash_map_iterator& obj) const
+    {
+        return base_type::operator == (obj);
+    }
+
+    bool operator != (const hash_map_const_iterator& obj) const
+    {
+        return base_type::operator != (obj);
+    }
+
+    bool operator != (const hash_map_iterator& obj) const
+    {
+        return base_type::operator != (obj);
+    }
+
+    //! Return const-reference to current value
+    const_reference operator * ()
+    {
+        if (this->source_ == hash_map_type::src_internal)
+        {
+            return this->node_->value_;
+        }
+        else
+        {
+            if (this->reader_ == NULL)
+                base_type::init_reader();
+
+            return this->reader_->const_value();
+        }
+    }
+
+    //! Increment iterator
+    hash_map_const_iterator<hash_map_type>& operator ++ ()
+    {
+        base_type::find_next(true);
+        return *this;
+    }
+};
+
+} // namespace hash_map
+
+STXXL_END_NAMESPACE
+
+#endif // !STXXL_CONTAINERS_HASH_MAP_ITERATOR_HEADER
diff --git a/include/stxxl/bits/containers/hash_map/iterator_map.h b/include/stxxl/bits/containers/hash_map/iterator_map.h
new file mode 100644
index 0000000..068b999
--- /dev/null
+++ b/include/stxxl/bits/containers/hash_map/iterator_map.h
@@ -0,0 +1,279 @@
+/***************************************************************************
+ *  include/stxxl/bits/containers/hash_map/iterator_map.h
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2007 Markus Westphal <marwes at users.sourceforge.net>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#ifndef STXXL_CONTAINERS_HASH_MAP_ITERATOR_MAP_HEADER
+#define STXXL_CONTAINERS_HASH_MAP_ITERATOR_MAP_HEADER
+
+#include <map>
+
+#include <stxxl/bits/noncopyable.h>
+#include <stxxl/bits/compat/hash_map.h>
+#include <stxxl/bits/containers/hash_map/iterator.h>
+
+STXXL_BEGIN_NAMESPACE
+
+namespace hash_map {
+
+template <class HashMap>
+class iterator_map : private noncopyable
+{
+public:
+    typedef HashMap hash_map_type;
+    typedef typename hash_map_type::node_type node_type;
+    typedef typename hash_map_type::source_type source_type;
+    typedef typename hash_map_type::key_type key_type;
+
+    typedef typename hash_map_type::internal_size_type internal_size_type;
+    typedef typename hash_map_type::external_size_type external_size_type;
+
+    typedef hash_map_iterator_base<hash_map_type> iterator_base;
+
+private:
+#if 0
+    typedef std::multimap<internal_size_type, iterator_base*> multimap_type;
+#else
+    struct hasher
+    {
+        size_t operator () (const internal_size_type& key) const
+        {
+            return longhash1(key);
+        }
+#if STXXL_MSVC
+        bool operator () (const internal_size_type& a, const internal_size_type& b) const
+        {
+            return (a < b);
+        }
+        enum
+        {                                       // parameters for hash table
+            bucket_size = 4,                    // 0 < bucket_size
+            min_buckets = 8                     // min_buckets = 2 ^^ N, 0 < N
+        };
+#endif
+    };
+    // store iterators by bucket-index
+    typedef typename compat_hash_multimap<
+            internal_size_type, iterator_base*, hasher
+            >::result multimap_type;
+#endif
+
+    //! bucket-index and pointer to iterator_base
+    typedef typename multimap_type::value_type pair_type;
+    typedef typename multimap_type::iterator mmiterator_type;
+    typedef typename multimap_type::const_iterator const_mmiterator_type;
+
+    hash_map_type* map_;
+    multimap_type it_map_;
+
+public:
+    iterator_map(hash_map_type* map)
+        : map_(map)
+    { }
+
+    ~iterator_map()
+    {
+        it_map_.clear();
+    }
+
+    void register_iterator(iterator_base& it)
+    {
+        register_iterator(it, it.i_bucket_);
+    }
+
+    void register_iterator(iterator_base& it, internal_size_type i_bucket)
+    {
+        STXXL_VERBOSE2("hash_map::iterator_map register_iterator addr=" << &it << " bucket=" << i_bucket);
+        it_map_.insert(pair_type(i_bucket, &it));
+    }
+
+    void unregister_iterator(iterator_base& it)
+    {
+        unregister_iterator(it, it.i_bucket_);
+    }
+
+    void unregister_iterator(iterator_base& it, internal_size_type i_bucket)
+    {
+        STXXL_VERBOSE2("hash_map::iterator_map unregister_iterator addr=" << &it << " bucket=" << i_bucket);
+
+        std::pair<mmiterator_type, mmiterator_type> range
+            = it_map_.equal_range(i_bucket);
+
+        assert(range.first != range.second);
+
+        for (mmiterator_type i = range.first; i != range.second; ++i)
+        {
+            if (i->second == &it)
+            {
+                it_map_.erase(i);
+                return;
+            }
+        }
+
+        throw std::runtime_error("unregister_iterator Panic in hash_map::iterator_map, can not find and unregister iterator");
+    }
+
+    //! Update iterators with given key and bucket and make them point to the
+    //! specified location in external memory (will be called during
+    //! re-hashing)
+    void fix_iterators_2ext(internal_size_type i_bucket_old, const key_type& key,
+                            internal_size_type i_bucket_new, external_size_type i_ext)
+    {
+        STXXL_VERBOSE2("hash_map::iterator_map fix_iterators_2ext i_bucket=" << i_bucket_old << " new_i_ext=" << i_ext);
+
+        std::vector<iterator_base*> its2fix;
+        _find(i_bucket_old, its2fix);
+
+        for (typename std::vector<iterator_base*>::iterator
+             it2fix = its2fix.begin(); it2fix != its2fix.end(); ++it2fix)
+        {
+            if (!map_->_eq(key, (**it2fix).key_))
+                continue;
+
+            if (i_bucket_old != i_bucket_new)
+            {
+                unregister_iterator(**it2fix);
+                register_iterator(**it2fix, i_bucket_new);
+            }
+
+            (**it2fix).i_bucket_ = i_bucket_new;
+            (**it2fix).node_ = NULL;
+            (**it2fix).i_external_ = i_ext;
+            (**it2fix).source_ = hash_map_type::src_external;
+            // external position is now known (i_ext) and therefore valid
+            (**it2fix).ext_valid_ = true;
+            (**it2fix).reset_reader();
+            (**it2fix).reader_ = NULL;
+        }
+    }
+
+    //! Update iterators with given key and bucket and make them point to the
+    //! specified node in internal memory (will be called by insert_oblivious)
+    void fix_iterators_2int(internal_size_type i_bucket, const key_type& key, node_type* node)
+    {
+        STXXL_VERBOSE2("hash_map::iterator_map fix_iterators_2int i_bucket=" << i_bucket << " node=" << node);
+
+        std::vector<iterator_base*> its2fix;
+        _find(i_bucket, its2fix);
+
+        for (typename std::vector<iterator_base*>::iterator
+             it2fix = its2fix.begin(); it2fix != its2fix.end(); ++it2fix)
+        {
+            if (!map_->_eq((**it2fix).key_, key))
+                continue;
+
+            assert((**it2fix).source_ == hash_map_type::src_external);
+
+            (**it2fix).source_ = hash_map_type::src_internal;
+            (**it2fix).node_ = node;
+            (**it2fix).i_external_++;
+            if ((** it2fix).reader_)
+                (** it2fix).reader_->operator ++ ();
+        }
+    }
+
+    //! Update iterators with given key and bucket and make them point to the
+    //! end of the hash-map (called by erase and erase_oblivious)
+    void fix_iterators_2end(internal_size_type i_bucket, const key_type& key)
+    {
+        STXXL_VERBOSE2("hash_map::iterator_map fix_iterators_2end i_bucket=" << i_bucket);
+
+        std::vector<iterator_base*> its2fix;
+        _find(i_bucket, its2fix);
+
+        for (typename std::vector<iterator_base*>::iterator
+             it2fix = its2fix.begin(); it2fix != its2fix.end(); ++it2fix)
+        {
+            if (!map_->_eq(key, (**it2fix).key_))
+                continue;
+
+            (**it2fix).end_ = true;
+            (**it2fix).reset_reader();
+            unregister_iterator(**it2fix);
+        }
+    }
+
+    //! Update all iterators and make them point to the end of the hash-map
+    //! (used by clear())
+    void fix_iterators_all2end()
+    {
+        for (mmiterator_type it2fix = it_map_.begin();
+             it2fix != it_map_.end(); ++it2fix)
+        {
+            (*it2fix).second->end_ = true;
+            (*it2fix).second->reset_reader();
+        }
+        it_map_.clear();
+    }
+
+private:
+    //! Find all iterators registered with given bucket and add them to outc
+    template <class OutputContainer>
+    void _find(internal_size_type i_bucket, OutputContainer& outc)
+    {
+        std::pair<mmiterator_type, mmiterator_type> range
+            = it_map_.equal_range(i_bucket);
+
+        for (mmiterator_type i = range.first; i != range.second; ++i)
+            outc.push_back((*i).second);
+    }
+
+    // changes hash_map pointer in all contained iterators
+    void change_hash_map_pointers(hash_map_type* map)
+    {
+        for (mmiterator_type it = it_map_.begin(); it != it_map_.end(); ++it)
+            ((*it).second)->map_ = map;
+    }
+
+public:
+    void swap(iterator_map<HashMap>& obj)
+    {
+        std::swap(it_map_, obj.it_map_);
+        std::swap(map_, obj.map_);
+
+        change_hash_map_pointers(map_);
+        obj.change_hash_map_pointers(obj.map_);
+    }
+
+    void print_statistics(std::ostream& o = std::cout) const
+    {
+        o << "Registered iterators: " << it_map_.size() << "\n";
+
+        for (const_mmiterator_type i = it_map_.begin(); i != it_map_.end(); ++i)
+        {
+            o << "  Address=" << i->second
+              << ", Bucket=" << i->second->i_bucket_
+              << ", Node=" << i->second->node_
+              << ", i_ext=" << i->second->i_external_
+              << ", "
+              << ((i->second->source_ == hash_map_type::src_external)
+                ? "external" : "internal") << std::endl;
+        }
+    }
+};
+
+} // namespace hash_map
+
+STXXL_END_NAMESPACE
+
+namespace std {
+
+template <class HashMapType>
+void swap(stxxl::hash_map::iterator_map<HashMapType>& a,
+          stxxl::hash_map::iterator_map<HashMapType>& b)
+{
+    if (&a != &b)
+        a.swap(b);
+}
+
+} // namespace std
+
+#endif // !STXXL_CONTAINERS_HASH_MAP_ITERATOR_MAP_HEADER
diff --git a/include/stxxl/bits/containers/hash_map/tuning.h b/include/stxxl/bits/containers/hash_map/tuning.h
new file mode 100644
index 0000000..525eb26
--- /dev/null
+++ b/include/stxxl/bits/containers/hash_map/tuning.h
@@ -0,0 +1,50 @@
+/***************************************************************************
+ *  include/stxxl/bits/containers/hash_map/tuning.h
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2007 Markus Westphal <marwes at users.sourceforge.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#ifndef STXXL_CONTAINERS_HASH_MAP_TUNING_HEADER
+#define STXXL_CONTAINERS_HASH_MAP_TUNING_HEADER
+#define _STXXL_TUNING_H_
+
+#include <stxxl/mng>
+#include <stxxl/bits/singleton.h>
+
+STXXL_BEGIN_NAMESPACE
+
+namespace hash_map {
+
+//! Tuning parameters for external memory hash map.
+class tuning : public singleton<tuning>
+{
+    friend class singleton<tuning>;
+
+public:
+    //! see buffered_reader
+    size_t prefetch_page_size;
+    //! see buffered_reader
+    size_t prefetch_pages;
+    //! see block_cache and hash_map
+    size_t blockcache_size;
+
+private:
+    /*! set reasonable default values for tuning params */
+    tuning()
+        : prefetch_page_size(config::get_instance()->disks_number() * 2),
+          prefetch_pages(2),
+          blockcache_size(config::get_instance()->disks_number() * 12)
+    { }
+};
+
+} // namespace hash_map
+
+STXXL_END_NAMESPACE
+
+#endif // !STXXL_CONTAINERS_HASH_MAP_TUNING_HEADER
diff --git a/include/stxxl/bits/containers/hash_map/util.h b/include/stxxl/bits/containers/hash_map/util.h
new file mode 100644
index 0000000..ff31eb6
--- /dev/null
+++ b/include/stxxl/bits/containers/hash_map/util.h
@@ -0,0 +1,577 @@
+/***************************************************************************
+ *  include/stxxl/bits/containers/hash_map/util.h
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2007 Markus Westphal <marwes at users.sourceforge.net>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#ifndef STXXL_CONTAINERS_HASH_MAP_UTIL_HEADER
+#define STXXL_CONTAINERS_HASH_MAP_UTIL_HEADER
+#define STXXL_CONTAINERS_HASHMAP__UTIL_H
+
+#include <stxxl/bits/mng/block_manager.h>
+#include <stxxl/bits/mng/buf_writer.h>
+
+#include <stxxl/bits/containers/hash_map/tuning.h>
+#include <stxxl/bits/containers/hash_map/block_cache.h>
+
+STXXL_BEGIN_NAMESPACE
+
+namespace hash_map {
+
+// For internal memory chaining: struct to compose next-pointer and delete-flag
+// share the same memory: the lowest bit is occupied by the del-flag.
+template <class ValueType>
+struct node
+{
+    node<ValueType>* next_and_del_;
+    ValueType value_;
+
+    //! check if the next node is deleted.
+    bool deleted()
+    {
+        return ((int_type)next_and_del_ & 0x01) == 1;
+    }
+    //! change deleted flag on the next node
+    bool set_deleted(bool d)
+    {
+        next_and_del_ = (node<ValueType>*)(((int_type)next_and_del_ & ~0x01) | (int_type)d);
+        return d;
+    }
+
+    //! return the next node, without the "next" flag.
+    node<ValueType> * next()
+    {
+        return (node<ValueType>*)((int_type)next_and_del_ & ~0x01);
+    }
+    //! change the "next" value of next node pointer
+    node<ValueType> * set_next(node<ValueType>* n)
+    {
+        next_and_del_ = (node<ValueType>*)(((int_type)next_and_del_ & 0x01) | (int_type)n);
+        return n;
+    }
+};
+
+template <class NodeType>
+struct bucket
+{
+    //! entry point to the chain in internal memory
+    NodeType* list_;
+
+    //! number of elements in external memory
+    external_size_type n_external_;
+
+    //! index of first block's bid (to be used as index for hash_map's
+    //! bids_-array
+    internal_size_type i_block_;
+
+    //! index of first subblock
+    internal_size_type i_subblock_;
+
+    bucket()
+        : list_(NULL),
+          n_external_(0),
+          i_block_(0),
+          i_subblock_(0)
+    { }
+
+    bucket(NodeType* list, external_size_type n_external,
+           internal_size_type i_block, internal_size_type i_subblock)
+        : list_(list),
+          n_external_(n_external),
+          i_block_(i_block),
+          i_subblock_(i_subblock)
+    { }
+};
+
+//! Used to scan external memory with prefetching.
+template <class CacheType, class BidIterator>
+class buffered_reader : private noncopyable
+{
+public:
+    typedef CacheType cache_type;
+    typedef BidIterator bid_iterator;
+
+    typedef typename cache_type::block_type block_type;
+    typedef typename block_type::value_type subblock_type;
+    typedef typename subblock_type::value_type value_type;
+
+    typedef typename bid_iterator::value_type bid_type;
+
+    enum { block_size = block_type::size, subblock_size = subblock_type::size };
+
+private:
+    //! index within current block
+    unsigned_type i_value_;
+    //! points to the beginning of the block-sequence
+    bid_iterator begin_bid_;
+    //! points to the current block
+    bid_iterator curr_bid_;
+    //! points to the end of the block-sequence
+    bid_iterator end_bid_;
+    //! points to the next block to prefetch
+    bid_iterator pref_bid_;
+
+    //! shared block-cache
+    cache_type& cache_;
+
+    //! true if prefetching enabled
+    bool prefetch_;
+    //! pages, which are read at once from disk, consist of this many blocks
+    unsigned_type page_size_;
+    //! number of pages to prefetch
+    unsigned_type prefetch_pages_;
+
+    //! current block dirty ?
+    bool dirty_;
+    //! current subblock
+    subblock_type* subblock_;
+
+public:
+    //! Create a new buffered reader to read the blocks in [seq_begin, seq_end)
+    //! \param seq_begin First block's bid
+    //! \param seq_end Last block's bid
+    //! \param cache Block-cache used for prefetching
+    //! \param i_subblock Start reading from this subblock
+    //! \param prefetch Enable/Disable prefetching
+    buffered_reader(bid_iterator seq_begin, bid_iterator seq_end,
+                    cache_type& cache,
+                    internal_size_type i_subblock = 0, bool prefetch = true)
+        : i_value_(0),
+          begin_bid_(seq_begin),
+          curr_bid_(seq_begin),
+          end_bid_(seq_end),
+          cache_(cache),
+          prefetch_(false),
+          page_size_(tuning::get_instance()->prefetch_page_size),
+          prefetch_pages_(tuning::get_instance()->prefetch_pages),
+          dirty_(false),
+          subblock_(NULL)
+    {
+        if (seq_begin == seq_end)
+            return;
+
+        if (prefetch)
+            enable_prefetching();
+
+        // will (amongst other things) set subblock_ and retain current block
+        skip_to(seq_begin, i_subblock);
+    }
+
+    ~buffered_reader()
+    {
+        if (curr_bid_ != end_bid_)
+            cache_.release_block(*curr_bid_);
+    }
+
+    void enable_prefetching()
+    {
+        if (prefetch_)
+            return;
+
+        prefetch_ = true;
+        pref_bid_ = curr_bid_;
+        // start prefetching page_size*prefetch_pages blocks beginning with current one
+        for (unsigned_type i = 0; i < page_size_ * prefetch_pages_; i++)
+        {
+            if (pref_bid_ == end_bid_)
+                break;
+
+            cache_.prefetch_block(*pref_bid_);
+            ++pref_bid_;
+        }
+    }
+
+    //! Get const-reference to current value.
+    const value_type & const_value()
+    {
+        return (*subblock_)[i_value_ % subblock_size];
+    }
+
+    //! Get reference to current value. The current value's block's dirty flag
+    //! will be set.
+    value_type & value()
+    {
+        if (!dirty_) {
+            cache_.make_dirty(*curr_bid_);
+            dirty_ = true;
+        }
+
+        return (*subblock_)[i_value_ % subblock_size];
+    }
+
+    //! Advance to the next value
+    //! \return false if last value has been reached, otherwise true.
+    bool operator ++ ()
+    {
+        if (curr_bid_ == end_bid_)
+            return false;
+
+        // same block
+        if (i_value_ + 1 < block_size * subblock_size)
+        {
+            i_value_++;
+        }
+        // entered new block
+        else
+        {
+            cache_.release_block(*curr_bid_);
+
+            i_value_ = 0;
+            dirty_ = false;
+            ++curr_bid_;
+
+            if (curr_bid_ == end_bid_)
+                return false;
+
+            cache_.retain_block(*curr_bid_);
+
+            // if a complete page has been consumed, prefetch the next one
+            if (prefetch_ && (curr_bid_ - begin_bid_) % page_size_ == 0)
+            {
+                for (unsigned i = 0; i < page_size_; i++)
+                {
+                    if (pref_bid_ == end_bid_)
+                        break;
+                    cache_.prefetch_block(*pref_bid_);
+                    ++pref_bid_;
+                }
+            }
+        }
+
+        // entered new subblock
+        if (i_value_ % subblock_size == 0)
+        {
+            subblock_ = cache_.get_subblock(*curr_bid_, i_value_ / subblock_size);
+        }
+
+        return true;
+    }
+
+    //! Skip remaining values of the current subblock.
+    void next_subblock()
+    {
+        i_value_ = (i_value_ / subblock_size + 1) * subblock_size - 1;
+        operator ++ ();         // takes care of prefetching etc
+    }
+
+    //! Continue reading at given block and subblock.
+    void skip_to(bid_iterator bid, internal_size_type i_subblock)
+    {
+        if (curr_bid_ == end_bid_)
+            return;
+
+        if (bid != curr_bid_)
+            dirty_ = false;
+
+        cache_.release_block(*curr_bid_);
+
+        if (bid == end_bid_)
+            return;
+
+        // skip to block
+        while (curr_bid_ != bid) {
+            ++curr_bid_;
+
+            if (prefetch_ && (curr_bid_ - begin_bid_) % page_size_ == 0)
+            {
+                for (unsigned i = 0; i < page_size_; i++)
+                {
+                    if (pref_bid_ == end_bid_)
+                        break;
+                    cache_.prefetch_block(*pref_bid_);
+                    ++pref_bid_;
+                }
+            }
+        }
+        // skip to subblock
+        i_value_ = i_subblock * subblock_size;
+        subblock_ = cache_.get_subblock(*curr_bid_, i_subblock);
+        cache_.retain_block(*curr_bid_);
+    }
+};
+
+//! Buffered writing of values. New Blocks are allocated as needed.
+template <class BlockType, class BidContainer>
+class buffered_writer : private noncopyable
+{
+public:
+    typedef BlockType block_type;
+    typedef BidContainer bid_container_type;
+
+    typedef typename block_type::value_type subblock_type;
+    typedef typename subblock_type::value_type value_type;
+
+    typedef stxxl::buffered_writer<block_type> writer_type;
+
+    enum {
+        block_size = block_type::size,
+        subblock_size = subblock_type::size
+    };
+
+private:
+    //! buffered writer
+    writer_type writer_;
+    //! current buffer-block
+    block_type* block_;
+
+    //! sequence of allocated blocks (to be expanded as needed)
+    bid_container_type* bids_;
+
+    //! current block's index
+    unsigned_type i_block_;
+    //! current value's index in the range of [0..\#values per block[
+    unsigned_type i_value_;
+    //! number of blocks to allocate in a row
+    unsigned_type increase_;
+
+public:
+    //! Create a new buffered writer.
+    //! \param c write values to these blocks (c holds the bids)
+    //! \param buffer_size Number of write-buffers to use
+    //! \param batch_size bulk buffered writing
+    buffered_writer(bid_container_type* c,
+                    int_type buffer_size, int_type batch_size)
+        : writer_(buffer_size, batch_size),
+          bids_(c),
+          i_block_(0),
+          i_value_(0),
+          increase_(config::get_instance()->disks_number() * 3)
+    {
+        block_ = writer_.get_free_block();
+    }
+
+    ~buffered_writer()
+    {
+        flush();
+    }
+
+    //! Write all values from given stream.
+    template <class StreamType>
+    void append_from_stream(StreamType& stream)
+    {
+        while (!stream.empty())
+        {
+            append(*stream);
+            ++stream;
+        }
+    }
+
+    //! Write given value.
+    void append(const value_type& value)
+    {
+        internal_size_type i_subblock = (i_value_ / subblock_size);
+        (*block_)[i_subblock][i_value_ % subblock_size] = value;
+
+        if (i_value_ + 1 < block_size * subblock_size)
+            i_value_++;
+        // reached end of a block
+        else
+        {
+            i_value_ = 0;
+
+            // allocate new blocks if neccessary ...
+            if (i_block_ == bids_->size())
+            {
+                bids_->resize(bids_->size() + increase_);
+                block_manager* bm = stxxl::block_manager::get_instance();
+                bm->new_blocks(striping(), bids_->end() - increase_, bids_->end());
+            }
+            // ... and write current block
+            block_ = writer_.write(block_, (*bids_)[i_block_]);
+
+            i_block_++;
+        }
+    }
+
+    //! Continue writing at the beginning of the next subblock. TODO more
+    //! efficient
+    void finish_subblock()
+    {
+        i_value_ = (i_value_ / subblock_size + 1) * subblock_size - 1;
+        append(value_type());           // writing and allocating blocks etc
+    }
+
+    //! Flushes not yet written blocks.
+    void flush()
+    {
+        i_value_ = 0;
+        if (i_block_ == bids_->size())
+        {
+            bids_->resize(bids_->size() + increase_);
+            block_manager* bm = stxxl::block_manager::get_instance();
+            bm->new_blocks(striping(), bids_->end() - increase_, bids_->end());
+        }
+        block_ = writer_.write(block_, (*bids_)[i_block_]);
+        i_block_++;
+
+        writer_.flush();
+    }
+
+    //! Index of current block.
+    internal_size_type i_block() { return i_block_; }
+
+    //! Index of current subblock.
+    internal_size_type i_subblock() { return i_value_ / subblock_size; }
+};
+
+/*!
+ * Additional information about a stored value:
+ * - the bucket in which it can be found
+ * - where it is currently stored (intern or extern)
+ * - the buffer-node
+ * - the position in external memory
+ */
+template <class HashMap>
+struct HashedValue
+{
+    typedef HashMap hash_map_type;
+    typedef typename hash_map_type::value_type value_type;
+    typedef typename hash_map_type::source_type source_type;
+    typedef typename hash_map_type::node_type node_type;
+
+    typedef typename hash_map_type::internal_size_type internal_size_type;
+    typedef typename hash_map_type::external_size_type external_size_type;
+
+    value_type value_;
+    internal_size_type i_bucket_;
+    source_type source_;
+    node_type* node_;
+    external_size_type i_external_;
+
+    HashedValue()
+        : i_bucket_(internal_size_type(-1))
+    { }
+
+    HashedValue(const value_type& value, internal_size_type i_bucket,
+                source_type src, node_type* node, external_size_type i_external)
+        : value_(value),
+          i_bucket_(i_bucket),
+          source_(src),
+          node_(node),
+          i_external_(i_external)
+    { }
+};
+
+/*!
+ * Stream interface for all value-pairs currently stored in the map. Returned
+ * values are HashedValue-objects (actual value enriched with information on
+ * where the value can be found (bucket-number, internal, external)).  Values,
+ * marked as deleted in internal-memory, are not returned; for modified values
+ * only the one in internal memory is returned.
+*/
+template <class HashMap, class Reader>
+struct HashedValuesStream
+{
+    typedef HashMap hash_map_type;
+    typedef HashedValue<HashMap> value_type;
+
+    typedef typename hash_map_type::node_type node_type;
+    typedef typename hash_map_type::bid_container_type::iterator bid_iterator;
+    typedef typename hash_map_type::buckets_container_type::iterator bucket_iterator;
+
+    typedef typename hash_map_type::internal_size_type internal_size_type;
+    typedef typename hash_map_type::external_size_type external_size_type;
+
+    hash_map_type& map_;
+    Reader& reader_;
+    bucket_iterator curr_bucket_;
+    bucket_iterator end_bucket_;
+    bid_iterator begin_bid_;
+    internal_size_type i_bucket_;
+    node_type* node_;
+    external_size_type i_external_;
+    value_type value_;
+
+    HashedValuesStream(bucket_iterator begin_bucket, bucket_iterator end_bucket,
+                       Reader& reader, bid_iterator begin_bid,
+                       hash_map_type& map)
+        : map_(map),
+          reader_(reader),
+          curr_bucket_(begin_bucket),
+          end_bucket_(end_bucket),
+          begin_bid_(begin_bid),
+          i_bucket_(0),
+          node_(curr_bucket_ != end_bucket_ ? curr_bucket_->list_ : NULL),
+          i_external_(0)
+    {
+        if (!empty())
+            value_ = find_next();
+    }
+
+    const value_type& operator * () { return value_; }
+
+    bool empty() const { return curr_bucket_ == end_bucket_; }
+
+    void operator ++ ()
+    {
+        if (value_.source_ == hash_map_type::src_internal)
+            node_ = node_->next();
+        else
+        {
+            ++reader_;
+            ++i_external_;
+        }
+        value_ = find_next();
+    }
+
+    value_type find_next()
+    {
+        while (true)
+        {
+            // internal and external elements available
+            while (node_ && i_external_ < curr_bucket_->n_external_)
+            {
+                if (map_._leq(node_->value_.first, reader_.const_value().first))
+                {
+                    if (map_._eq(node_->value_.first, reader_.const_value().first))
+                    {
+                        ++reader_;
+                        ++i_external_;
+                    }
+
+                    if (!node_->deleted())
+                        return value_type(node_->value_, i_bucket_, hash_map_type::src_internal, node_, i_external_);
+                    else
+                        node_ = node_->next();
+                }
+                else
+                    return value_type(reader_.const_value(), i_bucket_, hash_map_type::src_external, node_, i_external_);
+            }
+            // only internal elements left
+            while (node_)
+            {
+                if (!node_->deleted())
+                    return value_type(node_->value_, i_bucket_, hash_map_type::src_internal, node_, i_external_);
+                else
+                    node_ = node_->next();
+            }
+            // only external elements left
+            while (i_external_ < curr_bucket_->n_external_)
+                return value_type(reader_.const_value(), i_bucket_, hash_map_type::src_external, node_, i_external_);
+
+            // if we made it to this point there are obviously no more values in the current bucket
+            // let's try the next one (outer while-loop!)
+            ++curr_bucket_;
+            ++i_bucket_;
+            if (curr_bucket_ == end_bucket_)
+                return value_type();
+
+            node_ = curr_bucket_->list_;
+            i_external_ = 0;
+            reader_.skip_to(begin_bid_ + curr_bucket_->i_block_, curr_bucket_->i_subblock_);
+        }
+    }
+};
+
+} // namespace hash_map
+
+STXXL_END_NAMESPACE
+
+#endif // !STXXL_CONTAINERS_HASH_MAP_UTIL_HEADER
diff --git a/include/stxxl/bits/containers/map.h b/include/stxxl/bits/containers/map.h
index 64786fb..2831140 100644
--- a/include/stxxl/bits/containers/map.h
+++ b/include/stxxl/bits/containers/map.h
@@ -17,7 +17,6 @@
 #include <stxxl/bits/noncopyable.h>
 #include <stxxl/bits/containers/btree/btree.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 namespace btree {
@@ -84,7 +83,7 @@ class map : private noncopyable
 {
     typedef btree::btree<KeyType, DataType, CompareType, RawNodeSize, RawLeafSize, PDAllocStrategy> impl_type;
 
-    impl_type Impl;
+    impl_type impl;
 
 public:
     typedef typename impl_type::node_block_type node_block_type;
@@ -110,10 +109,10 @@ public:
     //! \name Iterators
     //! \{
 
-    iterator begin() { return Impl.begin(); }
-    iterator end() { return Impl.end(); }
-    const_iterator begin() const { return Impl.begin(); }
-    const_iterator end() const { return Impl.end(); }
+    iterator begin() { return impl.begin(); }
+    iterator end() { return impl.end(); }
+    const_iterator begin() const { return impl.begin(); }
+    const_iterator end() const { return impl.end(); }
     const_iterator cbegin() const { return begin(); }
     const_iterator cend() const { return end(); }
 
@@ -147,17 +146,17 @@ public:
     //! \name Capacity
     //! \{
 
-    size_type size() const { return Impl.size(); }
-    size_type max_size() const { return Impl.max_size(); }
-    bool empty() const { return Impl.empty(); }
+    size_type size() const { return impl.size(); }
+    size_type max_size() const { return impl.max_size(); }
+    bool empty() const { return impl.empty(); }
 
     //! \}
 
     //! \name Observers
     //! \{
 
-    key_compare key_comp() const { return Impl.key_comp(); }
-    value_compare value_comp() const { return Impl.value_comp(); }
+    key_compare key_comp() const { return impl.key_comp(); }
+    value_compare value_comp() const { return impl.value_comp(); }
 
     //! \}
 
@@ -169,7 +168,7 @@ public:
     //! \param leaf_cache_size_in_bytes size of leaf cache in bytes (btree implementation)
     map(unsigned_type node_cache_size_in_bytes,
         unsigned_type leaf_cache_size_in_bytes
-        ) : Impl(node_cache_size_in_bytes, leaf_cache_size_in_bytes)
+        ) : impl(node_cache_size_in_bytes, leaf_cache_size_in_bytes)
     { }
 
     //! A constructor
@@ -179,7 +178,7 @@ public:
     map(const key_compare& c_,
         unsigned_type node_cache_size_in_bytes,
         unsigned_type leaf_cache_size_in_bytes
-        ) : Impl(c_, node_cache_size_in_bytes, leaf_cache_size_in_bytes)
+        ) : impl(c_, node_cache_size_in_bytes, leaf_cache_size_in_bytes)
     { }
 
     //! Constructs a map from a given input range
@@ -199,7 +198,7 @@ public:
         bool range_sorted = false,
         double node_fill_factor = 0.75,
         double leaf_fill_factor = 0.6
-        ) : Impl(b, e, node_cache_size_in_bytes, leaf_cache_size_in_bytes,
+        ) : impl(b, e, node_cache_size_in_bytes, leaf_cache_size_in_bytes,
                  range_sorted, node_fill_factor, leaf_fill_factor)
     { }
 
@@ -222,7 +221,7 @@ public:
         bool range_sorted = false,
         double node_fill_factor = 0.75,
         double leaf_fill_factor = 0.6
-        ) : Impl(b, e, c_, node_cache_size_in_bytes, leaf_cache_size_in_bytes,
+        ) : impl(b, e, c_, node_cache_size_in_bytes, leaf_cache_size_in_bytes,
                  range_sorted, node_fill_factor, leaf_fill_factor)
     { }
 
@@ -231,35 +230,35 @@ public:
     //! \name Modifiers
     //! \{
 
-    void swap(map& obj) { std::swap(Impl, obj.Impl); }
+    void swap(map& obj) { std::swap(impl, obj.impl); }
     std::pair<iterator, bool> insert(const value_type& x)
     {
-        return Impl.insert(x);
+        return impl.insert(x);
     }
     iterator insert(iterator pos, const value_type& x)
     {
-        return Impl.insert(pos, x);
+        return impl.insert(pos, x);
     }
     template <class InputIterator>
     void insert(InputIterator b, InputIterator e)
     {
-        Impl.insert(b, e);
+        impl.insert(b, e);
     }
     void erase(iterator pos)
     {
-        Impl.erase(pos);
+        impl.erase(pos);
     }
     size_type erase(const key_type& k)
     {
-        return Impl.erase(k);
+        return impl.erase(k);
     }
     void erase(iterator first, iterator last)
     {
-        Impl.erase(first, last);
+        impl.erase(first, last);
     }
     void clear()
     {
-        Impl.clear();
+        impl.clear();
     }
 
     //! \}
@@ -269,39 +268,39 @@ public:
 
     iterator find(const key_type& k)
     {
-        return Impl.find(k);
+        return impl.find(k);
     }
     const_iterator find(const key_type& k) const
     {
-        return Impl.find(k);
+        return impl.find(k);
     }
     size_type count(const key_type& k)
     {
-        return Impl.count(k);
+        return impl.count(k);
     }
     iterator lower_bound(const key_type& k)
     {
-        return Impl.lower_bound(k);
+        return impl.lower_bound(k);
     }
     const_iterator lower_bound(const key_type& k) const
     {
-        return Impl.lower_bound(k);
+        return impl.lower_bound(k);
     }
     iterator upper_bound(const key_type& k)
     {
-        return Impl.upper_bound(k);
+        return impl.upper_bound(k);
     }
     const_iterator upper_bound(const key_type& k) const
     {
-        return Impl.upper_bound(k);
+        return impl.upper_bound(k);
     }
     std::pair<iterator, iterator> equal_range(const key_type& k)
     {
-        return Impl.equal_range(k);
+        return impl.equal_range(k);
     }
     std::pair<const_iterator, const_iterator> equal_range(const key_type& k) const
     {
-        return Impl.equal_range(k);
+        return impl.equal_range(k);
     }
 
     //! \}
@@ -311,7 +310,7 @@ public:
 
     data_type& operator [] (const key_type& k)
     {
-        return Impl[k];
+        return impl[k];
     }
 
     //! \}
@@ -322,31 +321,31 @@ public:
     //! Enables leaf prefetching during scanning
     void enable_prefetching()
     {
-        Impl.enable_prefetching();
+        impl.enable_prefetching();
     }
 
     //! Disables leaf prefetching during scanning
     void disable_prefetching()
     {
-        Impl.disable_prefetching();
+        impl.disable_prefetching();
     }
 
     //! Returns the status of leaf prefetching during scanning
     bool prefetching_enabled()
     {
-        return Impl.prefetching_enabled();
+        return impl.prefetching_enabled();
     }
 
     //! Prints cache statistics
     void print_statistics(std::ostream& o) const
     {
-        Impl.print_statistics(o);
+        impl.print_statistics(o);
     }
 
     //! Resets cache statistics
     void reset_statistics()
     {
-        Impl.reset_statistics();
+        impl.reset_statistics();
     }
 
     //! \}
@@ -418,7 +417,7 @@ template <class KeyType,
 inline bool operator == (const map<KeyType, DataType, CompareType, RawNodeSize, RawLeafSize, PDAllocStrategy>& a,
                          const map<KeyType, DataType, CompareType, RawNodeSize, RawLeafSize, PDAllocStrategy>& b)
 {
-    return a.Impl == b.Impl;
+    return a.impl == b.impl;
 }
 
 template <class KeyType,
@@ -431,7 +430,7 @@ template <class KeyType,
 inline bool operator < (const map<KeyType, DataType, CompareType, RawNodeSize, RawLeafSize, PDAllocStrategy>& a,
                         const map<KeyType, DataType, CompareType, RawNodeSize, RawLeafSize, PDAllocStrategy>& b)
 {
-    return a.Impl < b.Impl;
+    return a.impl < b.impl;
 }
 
 template <class KeyType,
@@ -444,7 +443,7 @@ template <class KeyType,
 inline bool operator > (const map<KeyType, DataType, CompareType, RawNodeSize, RawLeafSize, PDAllocStrategy>& a,
                         const map<KeyType, DataType, CompareType, RawNodeSize, RawLeafSize, PDAllocStrategy>& b)
 {
-    return a.Impl > b.Impl;
+    return a.impl > b.impl;
 }
 
 template <class KeyType,
@@ -457,7 +456,7 @@ template <class KeyType,
 inline bool operator != (const map<KeyType, DataType, CompareType, RawNodeSize, RawLeafSize, PDAllocStrategy>& a,
                          const map<KeyType, DataType, CompareType, RawNodeSize, RawLeafSize, PDAllocStrategy>& b)
 {
-    return a.Impl != b.Impl;
+    return a.impl != b.impl;
 }
 
 template <class KeyType,
@@ -470,7 +469,7 @@ template <class KeyType,
 inline bool operator <= (const map<KeyType, DataType, CompareType, RawNodeSize, RawLeafSize, PDAllocStrategy>& a,
                          const map<KeyType, DataType, CompareType, RawNodeSize, RawLeafSize, PDAllocStrategy>& b)
 {
-    return a.Impl <= b.Impl;
+    return a.impl <= b.impl;
 }
 
 template <class KeyType,
@@ -483,7 +482,7 @@ template <class KeyType,
 inline bool operator >= (const map<KeyType, DataType, CompareType, RawNodeSize, RawLeafSize, PDAllocStrategy>& a,
                          const map<KeyType, DataType, CompareType, RawNodeSize, RawLeafSize, PDAllocStrategy>& b)
 {
-    return a.Impl >= b.Impl;
+    return a.impl >= b.impl;
 }
 
 //! \}
diff --git a/include/stxxl/bits/containers/matrix.h b/include/stxxl/bits/containers/matrix.h
index 41428ff..ae6ae1b 100644
--- a/include/stxxl/bits/containers/matrix.h
+++ b/include/stxxl/bits/containers/matrix.h
@@ -1076,18 +1076,28 @@ public:
     //! \param bs block scheduler used
     //! \param height height of the created matrix
     //! \param width width of the created matrix
-    matrix(block_scheduler_type& bs, const elem_size_type height, const elem_size_type width)
+    matrix(block_scheduler_type& bs,
+           const elem_size_type height, const elem_size_type width)
         : height(height),
           width(width),
-          data(new swappable_block_matrix_type
-                   (bs, div_ceil(height, BlockSideLength), div_ceil(width, BlockSideLength)))
+          data(
+              new swappable_block_matrix_type(
+                  bs,
+                  div_ceil(height, BlockSideLength),
+                  div_ceil(width, BlockSideLength))
+              )
     { }
 
-    matrix(block_scheduler_type& bs, const column_vector_type& left, const row_vector_type& right)
-        : height(left.size()),
-          width(right.size()),
-          data(new swappable_block_matrix_type
-                   (bs, div_ceil(height, BlockSideLength), div_ceil(width, BlockSideLength)))
+    matrix(block_scheduler_type& bs,
+           const column_vector_type& left, const row_vector_type& right)
+        : height((elem_size_type)left.size()),
+          width((elem_size_type)right.size()),
+          data(
+              new swappable_block_matrix_type(
+                  bs,
+                  div_ceil(height, BlockSideLength),
+                  div_ceil(width, BlockSideLength))
+              )
     { Ops::recursive_matrix_from_vectors(*data, left, right); }
 
     ~matrix() { }
diff --git a/include/stxxl/bits/containers/matrix_arithmetic.h b/include/stxxl/bits/containers/matrix_arithmetic.h
index 6e41b91..0ae6ffc 100644
--- a/include/stxxl/bits/containers/matrix_arithmetic.h
+++ b/include/stxxl/bits/containers/matrix_arithmetic.h
@@ -118,7 +118,7 @@ std::ostream& operator << (std::ostream& o, const matrix_operation_statistic_dat
 
 //! \}
 
-//! \internal \brief matrix low-level operations and tools
+//! matrix low-level operations and tools
 namespace matrix_local {
 
 //! A static_quadtree holds 4^Level elements arranged in a quad tree.
diff --git a/include/stxxl/bits/containers/pager.h b/include/stxxl/bits/containers/pager.h
index 95bef1a..4d6ccb4 100644
--- a/include/stxxl/bits/containers/pager.h
+++ b/include/stxxl/bits/containers/pager.h
@@ -21,7 +21,6 @@
 #include <stxxl/bits/common/rand.h>
 #include <stxxl/bits/common/simple_vector.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup stlcont_vector
diff --git a/include/stxxl/bits/containers/pq_ext_merger.h b/include/stxxl/bits/containers/pq_ext_merger.h
index b343724..8874173 100644
--- a/include/stxxl/bits/containers/pq_ext_merger.h
+++ b/include/stxxl/bits/containers/pq_ext_merger.h
@@ -46,8 +46,8 @@ private:
     origin_type m_origin;
 
 public:
-    short_sequence(Iterator first, Iterator last, origin_type origin) :
-        pair(first, last), m_origin(origin)
+    short_sequence(Iterator first, Iterator last, origin_type origin)
+        : pair(first, last), m_origin(origin)
     { }
 
     iterator begin()
@@ -120,23 +120,23 @@ public:
  * External merger, based on the loser tree data structure.
  * \param Arity_  maximum arity of merger, does not need to be a power of 2
  */
-template <class BlockType_,
-          class Cmp_,
-          unsigned Arity_,
-          class AllocStr_ = STXXL_DEFAULT_ALLOC_STRATEGY>
+template <class BlockType,
+          class Cmp,
+          unsigned Arity,
+          class AllocStr = STXXL_DEFAULT_ALLOC_STRATEGY>
 class ext_merger : private noncopyable
 {
 public:
     typedef stxxl::uint64 size_type;
-    typedef BlockType_ block_type;
+    typedef BlockType block_type;
     typedef typename block_type::bid_type bid_type;
     typedef typename block_type::value_type value_type;
-    typedef Cmp_ comparator_type;
-    typedef AllocStr_ alloc_strategy;
+    typedef Cmp comparator_type;
+    typedef AllocStr alloc_strategy;
     typedef read_write_pool<block_type> pool_type;
 
     // arity_bound / 2  <  arity  <=  arity_bound
-    enum { arity = Arity_, arity_bound = 1UL << (LOG2<Arity_>::ceil) };
+    enum { arity = Arity, arity_bound = 1UL << (LOG2<Arity>::ceil) };
 
 protected:
     comparator_type cmp;
@@ -253,7 +253,6 @@ protected:
         }
     };
 
-
 #if STXXL_PQ_EXTERNAL_LOSER_TREE
     struct Entry
     {
@@ -288,15 +287,15 @@ protected:
     block_type* sentinel_block;
 
 public:
-    ext_merger() :
-        size_(0), log_k(0), k(1), pool(0)
+    ext_merger()
+        : size_(0), log_k(0), k(1), pool(0)
     {
         init();
     }
 
-    ext_merger(pool_type* pool_) :
-        size_(0), log_k(0), k(1),
-        pool(pool_)
+    ext_merger(pool_type* pool_)
+        : size_(0), log_k(0), k(1),
+          pool(pool_)
     {
         init();
     }
@@ -364,7 +363,6 @@ private:
 #endif      //STXXL_PQ_EXTERNAL_LOSER_TREE
     }
 
-
 #if STXXL_PQ_EXTERNAL_LOSER_TREE
     // given any values in the leaves this
     // routing recomputes upper levels of the tree
@@ -487,7 +485,6 @@ private:
         rebuild_loser_tree();
     }
 
-
     // compact nonempty segments in the left half of the tree
     void compact_tree()
     {
@@ -535,7 +532,6 @@ private:
         rebuild_loser_tree();
     }
 
-
 #if 0
     void swap(ext_merger& obj)
     {
@@ -565,27 +561,29 @@ public:
     template <class OutputIterator>
     void multi_merge(OutputIterator begin, OutputIterator end)
     {
-        size_type length = end - begin;
+        int_type length = end - begin;
 
-        STXXL_VERBOSE1("ext_merger::multi_merge from " << k << " sequence(s), length = " << length);
+        STXXL_VERBOSE1("ext_merger::multi_merge from " << k << " sequence(s),"
+                       " length = " << length);
 
         if (length == 0)
             return;
 
         assert(k > 0);
-        assert(length <= size_);
+        assert(length <= (int_type)size_);
 
         //This is the place to make statistics about external multi_merge calls.
 
 #if STXXL_PARALLEL && STXXL_PARALLEL_PQ_MULTIWAY_MERGE_EXTERNAL
         typedef stxxl::int64 diff_type;
+
         typedef std::pair<typename block_type::iterator, typename block_type::iterator> sequence;
 
         std::vector<sequence> seqs;
         std::vector<unsigned_type> orig_seq_index;
 
-        Cmp_ cmp;
-        priority_queue_local::invert_order<Cmp_, value_type, value_type> inv_cmp(cmp);
+        Cmp cmp;
+        priority_queue_local::invert_order<Cmp, value_type, value_type> inv_cmp(cmp);
 
         for (unsigned_type i = 0; i < k; ++i) //initialize sequences
         {
@@ -769,7 +767,7 @@ public:
             assert(free_segments.empty());
             //memcpy(target, states[0], length * sizeof(value_type));
             //std::copy(states[0],states[0]+length,target);
-            for (size_type i = 0; i < length; ++i, ++(states[0]), ++begin)
+            for (int_type i = 0; i < length; ++i, ++(states[0]), ++begin)
                 *begin = *(states[0]);
 
             entry[0].key = **states;
@@ -883,7 +881,6 @@ private:
             if (is_sentinel(winner_key)) //
                 deallocate_segment(winner_index);
 
-
             // go up the entry-tree
             for (unsigned_type i = (winner_index + kReg) >> 1; i > 0; i >>= 1)
             {
@@ -930,7 +927,6 @@ private:
             if (is_sentinel(winner_key))
                 deallocate_segment(winner_index);
 
-
             ++target;
 
             // update loser tree
@@ -987,10 +983,9 @@ public:
             unsigned_type free_slot = free_segments.top();
             free_segments.pop();
 
-
             // link new segment
             assert(segment_size);
-            unsigned_type nblocks = segment_size / block_type::size;
+            unsigned_type nblocks = (unsigned_type)(segment_size / block_type::size);
             //assert(nblocks); // at least one block
             STXXL_VERBOSE1("ext_merger::insert_segment nblocks=" << nblocks);
             if (nblocks == 0)
@@ -999,7 +994,7 @@ public:
                                nblocks << " blocks");
                 STXXL_VERBOSE1("THIS IS INEFFICIENT: TRY TO CHANGE PRIORITY QUEUE PARAMETERS");
             }
-            unsigned_type first_size = segment_size % block_type::size;
+            unsigned_type first_size = (unsigned_type)(segment_size % block_type::size);
             if (first_size == 0)
             {
                 first_size = block_type::size;
diff --git a/include/stxxl/bits/containers/pq_helpers.h b/include/stxxl/bits/containers/pq_helpers.h
index 2149be8..e2036da 100644
--- a/include/stxxl/bits/containers/pq_helpers.h
+++ b/include/stxxl/bits/containers/pq_helpers.h
@@ -68,7 +68,7 @@
 #define STXXL_PQ_INTERNAL_LOSER_TREE 1
 #endif
 
-#define STXXL_VERBOSE_PQ(msg) STXXL_VERBOSE2("[" << static_cast<void*>(this) << "] priority_queue::" << msg)
+#define STXXL_VERBOSE_PQ(msg) STXXL_VERBOSE2_THIS("priority_queue::" << msg)
 
 STXXL_BEGIN_NAMESPACE
 
@@ -87,24 +87,22 @@ namespace priority_queue_local {
  * - Provides access to underlying heap, so (parallel) sorting in place is possible.
  * - Can be cleared "at once", without reallocation.
  */
-template <typename _Tp, typename _Sequence = std::vector<_Tp>,
-          typename _Compare = std::less<typename _Sequence::value_type> >
+template <typename ValueType, typename ContainerType = std::vector<ValueType>,
+          typename CompareType = std::less<ValueType> >
 class internal_priority_queue
 {
-    // concept requirements
-    typedef typename _Sequence::value_type _Sequence_value_type;
-
 public:
-    typedef typename _Sequence::value_type value_type;
-    typedef typename _Sequence::reference reference;
-    typedef typename _Sequence::const_reference const_reference;
-    typedef typename _Sequence::size_type size_type;
-    typedef          _Sequence container_type;
+    typedef ValueType value_type;
+    typedef ContainerType container_type;
+    typedef CompareType compare_type;
+    typedef typename container_type::reference reference;
+    typedef typename container_type::const_reference const_reference;
+    typedef typename container_type::size_type size_type;
 
 protected:
     //  See queue::heap for notes on these names.
-    _Sequence heap;
-    _Compare comp;
+    container_type heap;
+    CompareType comp;
     size_type current_size;
 
 public:
@@ -136,16 +134,16 @@ public:
 
     /*!
      * Add data to the %queue.
-     * @param  __x  Data to be added.
+     * @param  x  Data to be added.
      *
      * This is a typical %queue operation.
      * The time complexity of the operation depends on the underlying
-     * sequence.
+     * container.
      */
     void
-    push(const value_type& __x)
+    push(const value_type& x)
     {
-        heap[current_size] = __x;
+        heap[current_size] = x;
         ++current_size;
         std::push_heap(heap.begin(), heap.begin() + current_size, comp);
     }
@@ -155,7 +153,7 @@ public:
      *
      * This is a typical %queue operation.  It shrinks the %queue
      * by one.  The time complexity of the operation depends on the
-     * underlying sequence.
+     * underlying container.
      *
      * Note that no data is returned, and if the first element's
      * data is needed, it should be retrieved before pop() is
@@ -185,7 +183,7 @@ public:
 };
 
 //! Inverts the order of a comparison functor by swapping its arguments.
-template <class Predicate, typename first_argument_type, typename second_argument_type>
+template <class Predicate, typename FirstType, typename SecondType>
 class invert_order
 {
 protected:
@@ -195,7 +193,7 @@ public:
     explicit
     invert_order(const Predicate& _pred) : pred(_pred) { }
 
-    bool operator () (const first_argument_type& x, const second_argument_type& y) const
+    bool operator () (const FirstType& x, const SecondType& y) const
     {
         return pred(y, x);
     }
@@ -206,50 +204,50 @@ public:
  * - Maximum size is fixed at compilation time, so an array can be used.
  * - Can be cleared "at once", without reallocation.
  */
-template <typename Tp_, unsigned_type max_size_>
+template <typename ValueType, unsigned_type MaxSize>
 class internal_bounded_stack
 {
-    typedef Tp_ value_type;
+    typedef ValueType value_type;
     typedef unsigned_type size_type;
-    enum { max_size = max_size_ };
+    enum { max_size = MaxSize };
 
-    size_type size_;
-    value_type array[max_size];
+    size_type m_size;
+    value_type m_array[max_size];
 
 public:
-    internal_bounded_stack() : size_(0) { }
+    internal_bounded_stack() : m_size(0) { }
 
     void push(const value_type& x)
     {
-        assert(size_ < max_size);
-        array[size_++] = x;
+        assert(m_size < max_size);
+        m_array[m_size++] = x;
     }
 
     const value_type & top() const
     {
-        assert(size_ > 0);
-        return array[size_ - 1];
+        assert(m_size > 0);
+        return m_array[m_size - 1];
     }
 
     void pop()
     {
-        assert(size_ > 0);
-        --size_;
+        assert(m_size > 0);
+        --m_size;
     }
 
     void clear()
     {
-        size_ = 0;
+        m_size = 0;
     }
 
     size_type size() const
     {
-        return size_;
+        return m_size;
     }
 
     bool empty() const
     {
-        return size_ == 0;
+        return m_size == 0;
     }
 };
 
diff --git a/include/stxxl/bits/containers/pq_losertree.h b/include/stxxl/bits/containers/pq_losertree.h
index 8520e98..9be46a6 100644
--- a/include/stxxl/bits/containers/pq_losertree.h
+++ b/include/stxxl/bits/containers/pq_losertree.h
@@ -32,15 +32,16 @@ namespace priority_queue_local {
 // The data structure from Knuth, "Sorting and Searching", Section 5.4.1
 /*!
  * Loser tree from Knuth, "Sorting and Searching", Section 5.4.1
- * \param  KNKMAX  maximum arity of loser tree, has to be a power of two
+ * \param  MaxArity  maximum arity of loser tree, has to be a power of two
  */
-template <class ValTp_, class Cmp_, unsigned KNKMAX>
+template <class ValueType, class CompareType, unsigned MaxArity>
 class loser_tree : private noncopyable
 {
 public:
-    typedef ValTp_ value_type;
-    typedef Cmp_ comparator_type;
+    typedef ValueType value_type;
+    typedef CompareType comparator_type;
     typedef value_type Element;
+    enum { max_arity = MaxArity };
 
 private:
 #if STXXL_PQ_INTERNAL_LOSER_TREE
@@ -53,7 +54,7 @@ private:
 
     comparator_type cmp;
     // stack of free segment indices
-    internal_bounded_stack<unsigned_type, KNKMAX> free_slots;
+    internal_bounded_stack<unsigned_type, MaxArity> free_slots;
 
     unsigned_type size_;     // total number of elements stored
     unsigned_type logK;      // log of current tree size
@@ -64,16 +65,16 @@ private:
 #if STXXL_PQ_INTERNAL_LOSER_TREE
     // upper levels of loser trees
     // entry[0] contains the winner info
-    Entry entry[KNKMAX];
+    Entry entry[MaxArity];
 #endif  //STXXL_PQ_INTERNAL_LOSER_TREE
 
     // leaf information
     // note that Knuth uses indices k..k-1
     // while we use 0..k-1
-    Element* current[KNKMAX];               // pointer to current element
-    Element* current_end[KNKMAX];           // pointer to end of block for current element
-    Element* segment[KNKMAX];               // start of Segments
-    unsigned_type segment_size[KNKMAX];     // just to count the internal memory consumption, in bytes
+    Element* current[MaxArity];               // pointer to current element
+    Element* current_end[MaxArity];           // pointer to end of block for current element
+    Element* segment[MaxArity];               // start of Segments
+    unsigned_type segment_size[MaxArity];     // just to count the internal memory consumption, in bytes
 
     unsigned_type mem_cons_;
 
@@ -177,12 +178,12 @@ public:
         std::swap(k, obj.k);
         std::swap(sentinel, obj.sentinel);
 #if STXXL_PQ_INTERNAL_LOSER_TREE
-        swap_1D_arrays(entry, obj.entry, KNKMAX);
+        swap_1D_arrays(entry, obj.entry, MaxArity);
 #endif      //STXXL_PQ_INTERNAL_LOSER_TREE
-        swap_1D_arrays(current, obj.current, KNKMAX);
-        swap_1D_arrays(current_end, obj.current_end, KNKMAX);
-        swap_1D_arrays(segment, obj.segment, KNKMAX);
-        swap_1D_arrays(segment_size, obj.segment_size, KNKMAX);
+        swap_1D_arrays(current, obj.current, MaxArity);
+        swap_1D_arrays(current_end, obj.current_end, MaxArity);
+        swap_1D_arrays(segment, obj.segment, MaxArity);
+        swap_1D_arrays(segment_size, obj.segment_size, MaxArity);
         std::swap(mem_cons_, obj.mem_cons_);
     }
 
@@ -196,16 +197,19 @@ public:
 
     bool is_space_available() const     // for new segment
     {
-        return (k < KNKMAX) || !free_slots.empty();
+        return (k < MaxArity) || !free_slots.empty();
     }
 
-    void insert_segment(Element * target, unsigned_type length);     // insert segment beginning at target
+    //! insert segment beginning at target
+    void insert_segment(Element * target, unsigned_type length);
+
     unsigned_type size() const { return size_; }
 };
 
 ///////////////////////// LoserTree ///////////////////////////////////
-template <class ValTp_, class Cmp_, unsigned KNKMAX>
-loser_tree<ValTp_, Cmp_, KNKMAX>::loser_tree() : size_(0), logK(0), k(1), mem_cons_(0)
+template <class ValueType, class CompareType, unsigned MaxArity>
+loser_tree<ValueType, CompareType, MaxArity>::loser_tree()
+    : size_(0), logK(0), k(1), mem_cons_(0)
 {
     free_slots.push(0);
     segment[0] = NULL;
@@ -216,8 +220,8 @@ loser_tree<ValTp_, Cmp_, KNKMAX>::loser_tree() : size_(0), logK(0), k(1), mem_co
     init();
 }
 
-template <class ValTp_, class Cmp_, unsigned KNKMAX>
-void loser_tree<ValTp_, Cmp_, KNKMAX>::init()
+template <class ValueType, class CompareType, unsigned MaxArity>
+void loser_tree<ValueType, CompareType, MaxArity>::init()
 {
     assert(!cmp(cmp.min_value(), cmp.min_value()));     // verify strict weak ordering
     sentinel = cmp.min_value();
@@ -227,28 +231,27 @@ void loser_tree<ValTp_, Cmp_, KNKMAX>::init()
 #endif  //STXXL_PQ_INTERNAL_LOSER_TREE
 }
 
-
 // rebuild loser tree information from the values in current
-template <class ValTp_, class Cmp_, unsigned KNKMAX>
-void loser_tree<ValTp_, Cmp_, KNKMAX>::rebuildLoserTree()
+template <class ValueType, class CompareType, unsigned MaxArity>
+void loser_tree<ValueType, CompareType, MaxArity>::rebuildLoserTree()
 {
 #if STXXL_PQ_INTERNAL_LOSER_TREE
-    assert(LOG2<KNKMAX>::floor == LOG2<KNKMAX>::ceil);     // KNKMAX needs to be a power of two
+    // MaxArity needs to be a power of two
+    assert(LOG2<MaxArity>::floor == LOG2<MaxArity>::ceil);
     unsigned_type winner = initWinner(1);
     entry[0].index = winner;
     entry[0].key = *(current[winner]);
-#endif //STXXL_PQ_INTERNAL_LOSER_TREE
+#endif  //STXXL_PQ_INTERNAL_LOSER_TREE
 }
 
-
 #if STXXL_PQ_INTERNAL_LOSER_TREE
 // given any values in the leaves this
 // routing recomputes upper levels of the tree
 // from scratch in linear time
 // initialize entry[root].index and the subtree rooted there
 // return winner index
-template <class ValTp_, class Cmp_, unsigned KNKMAX>
-unsigned_type loser_tree<ValTp_, Cmp_, KNKMAX>::initWinner(unsigned_type root)
+template <class ValueType, class CompareType, unsigned MaxArity>
+unsigned_type loser_tree<ValueType, CompareType, MaxArity>::initWinner(unsigned_type root)
 {
     if (root >= k) {     // leaf reached
         return root - k;
@@ -269,14 +272,13 @@ unsigned_type loser_tree<ValTp_, Cmp_, KNKMAX>::initWinner(unsigned_type root)
     }
 }
 
-
 // first go up the tree all the way to the root
 // hand down old winner for the respective subtree
 // based on new value, and old winner and loser
 // update each node on the path to the root top down.
 // This is implemented recursively
-template <class ValTp_, class Cmp_, unsigned KNKMAX>
-void loser_tree<ValTp_, Cmp_, KNKMAX>::update_on_insert(
+template <class ValueType, class CompareType, unsigned MaxArity>
+void loser_tree<ValueType, CompareType, MaxArity>::update_on_insert(
     unsigned_type node,
     const Element& newKey,
     unsigned_type newIndex,
@@ -322,14 +324,13 @@ void loser_tree<ValTp_, Cmp_, KNKMAX>::update_on_insert(
 }
 #endif //STXXL_PQ_INTERNAL_LOSER_TREE
 
-
 // make the tree two times as wide
-template <class ValTp_, class Cmp_, unsigned KNKMAX>
-void loser_tree<ValTp_, Cmp_, KNKMAX>::doubleK()
+template <class ValueType, class CompareType, unsigned MaxArity>
+void loser_tree<ValueType, CompareType, MaxArity>::doubleK()
 {
-    STXXL_VERBOSE3("loser_tree::doubleK (before) k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " #free=" << free_slots.size());
+    STXXL_VERBOSE3("loser_tree::doubleK (before) k=" << k << " logK=" << logK << " MaxArity=" << MaxArity << " #free=" << free_slots.size());
     assert(k > 0);
-    assert(k < KNKMAX);
+    assert(k < MaxArity);
     assert(free_slots.empty());                          // stack was free (probably not needed)
 
     // make all new entries free
@@ -346,17 +347,16 @@ void loser_tree<ValTp_, Cmp_, KNKMAX>::doubleK()
     k *= 2;
     logK++;
 
-    STXXL_VERBOSE3("loser_tree::doubleK (after)  k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " #free=" << free_slots.size());
+    STXXL_VERBOSE3("loser_tree::doubleK (after)  k=" << k << " logK=" << logK << " MaxArity=" << MaxArity << " #free=" << free_slots.size());
     assert(!free_slots.empty());
 
     // recompute loser tree information
     rebuildLoserTree();
 }
 
-
 // compact nonempty segments in the left half of the tree
-template <class ValTp_, class Cmp_, unsigned KNKMAX>
-void loser_tree<ValTp_, Cmp_, KNKMAX>::compactTree()
+template <class ValueType, class CompareType, unsigned MaxArity>
+void loser_tree<ValueType, CompareType, MaxArity>::compactTree()
 {
     STXXL_VERBOSE3("loser_tree::compactTree (before) k=" << k << " logK=" << logK << " #free=" << free_slots.size());
     assert(logK > 0);
@@ -409,14 +409,14 @@ void loser_tree<ValTp_, Cmp_, KNKMAX>::compactTree()
     rebuildLoserTree();
 }
 
-
 // insert segment beginning at target
 // require: is_space_available() == 1
-template <class ValTp_, class Cmp_, unsigned KNKMAX>
-void loser_tree<ValTp_, Cmp_, KNKMAX>::insert_segment(Element* target, unsigned_type length)
+template <class ValueType, class CompareType, unsigned MaxArity>
+void loser_tree<ValueType, CompareType, MaxArity>::
+insert_segment(Element* target, unsigned_type length)
 {
     STXXL_VERBOSE2("loser_tree::insert_segment(" << target << "," << length << ")");
-    //std::copy(target,target + length,std::ostream_iterator<ValTp_>(std::cout, "\n"));
+    //std::copy(target,target + length,std::ostream_iterator<ValueType>(std::cout, "\n"));
 
     if (length > 0)
     {
@@ -433,7 +433,6 @@ void loser_tree<ValTp_, Cmp_, KNKMAX>::insert_segment(Element* target, unsigned_
         unsigned_type index = free_slots.top();
         free_slots.pop();
 
-
         // link new segment
         current[index] = segment[index] = target;
         current_end[index] = target + length;
@@ -458,9 +457,8 @@ void loser_tree<ValTp_, Cmp_, KNKMAX>::insert_segment(Element* target, unsigned_
     }
 }
 
-
-template <class ValTp_, class Cmp_, unsigned KNKMAX>
-loser_tree<ValTp_, Cmp_, KNKMAX>::~loser_tree()
+template <class ValueType, class CompareType, unsigned MaxArity>
+loser_tree<ValueType, CompareType, MaxArity>::~loser_tree()
 {
     STXXL_VERBOSE1("loser_tree::~loser_tree()");
     for (unsigned_type i = 0; i < k; ++i)
@@ -477,8 +475,9 @@ loser_tree<ValTp_, Cmp_, KNKMAX>::~loser_tree()
 }
 
 // free an empty segment .
-template <class ValTp_, class Cmp_, unsigned KNKMAX>
-void loser_tree<ValTp_, Cmp_, KNKMAX>::deallocate_segment(unsigned_type slot)
+template <class ValueType, class CompareType, unsigned MaxArity>
+void loser_tree<ValueType, CompareType, MaxArity>::
+deallocate_segment(unsigned_type slot)
 {
     // reroute current pointer to some empty sentinel segment
     // with a sentinel key
@@ -496,14 +495,14 @@ void loser_tree<ValTp_, Cmp_, KNKMAX>::deallocate_segment(unsigned_type slot)
     free_slots.push(slot);
 }
 
-
 // delete the length smallest elements and write them to target
 // empty segments are deallocated
 // require:
 // - there are at least length elements
 // - segments are ended by sentinels
-template <class ValTp_, class Cmp_, unsigned KNKMAX>
-void loser_tree<ValTp_, Cmp_, KNKMAX>::multi_merge(Element* target, unsigned_type length)
+template <class ValueType, class CompareType, unsigned MaxArity>
+void loser_tree<ValueType, CompareType, MaxArity>::
+multi_merge(Element* target, unsigned_type length)
 {
     STXXL_VERBOSE3("loser_tree::multi_merge(target=" << target << ", len=" << length << ") k=" << k);
 
@@ -516,7 +515,7 @@ void loser_tree<ValTp_, Cmp_, KNKMAX>::multi_merge(Element* target, unsigned_typ
     //This is the place to make statistics about internal multi_merge calls.
 
 #if STXXL_PARALLEL && STXXL_PARALLEL_PQ_MULTIWAY_MERGE_INTERNAL
-    priority_queue_local::invert_order<Cmp_, value_type, value_type> inv_cmp(cmp);
+    priority_queue_local::invert_order<CompareType, value_type, value_type> inv_cmp(cmp);
 #endif
     switch (logK) {
     case 0:
@@ -650,7 +649,6 @@ void loser_tree<ValTp_, Cmp_, KNKMAX>::multi_merge(Element* target, unsigned_typ
         break;
     }
 
-
     size_ -= length;
 
     // compact tree if it got considerably smaller
@@ -674,21 +672,21 @@ void loser_tree<ValTp_, Cmp_, KNKMAX>::multi_merge(Element* target, unsigned_typ
             compactTree();
         }
     }
-    //std::copy(target,target + length,std::ostream_iterator<ValTp_>(std::cout, "\n"));
+    //std::copy(target,target + length,std::ostream_iterator<ValueType>(std::cout, "\n"));
 }
 
-
 // is this segment empty and does not point to sentinel yet?
-template <class ValTp_, class Cmp_, unsigned KNKMAX>
-inline bool loser_tree<ValTp_, Cmp_, KNKMAX>::is_segment_empty(unsigned_type slot)
+template <class ValueType, class CompareType, unsigned MaxArity>
+inline bool loser_tree<ValueType, CompareType, MaxArity>::
+is_segment_empty(unsigned_type slot)
 {
     return (is_sentinel(*(current[slot])) && (current[slot] != &sentinel));
 }
 
 #if STXXL_PQ_INTERNAL_LOSER_TREE
 // multi-merge for arbitrary K
-template <class ValTp_, class Cmp_, unsigned KNKMAX>
-void loser_tree<ValTp_, Cmp_, KNKMAX>::
+template <class ValueType, class CompareType, unsigned MaxArity>
+void loser_tree<ValueType, CompareType, MaxArity>::
 multi_merge_k(Element* target, unsigned_type length)
 {
     Entry* currentPos;
@@ -716,7 +714,6 @@ multi_merge_k(Element* target, unsigned_type length)
         if (is_sentinel(winnerKey))     //
             deallocate_segment(winnerIndex);
 
-
         // go up the entry-tree
         for (unsigned_type i = (winnerIndex + kReg) >> 1; i > 0; i >>= 1) {
             currentPos = entry + i;
diff --git a/include/stxxl/bits/containers/pq_mergers.h b/include/stxxl/bits/containers/pq_mergers.h
index 25d89c1..009c572 100644
--- a/include/stxxl/bits/containers/pq_mergers.h
+++ b/include/stxxl/bits/containers/pq_mergers.h
@@ -37,12 +37,13 @@ namespace priority_queue_local {
 // require: at least length nonsentinel elements available in source0, source1
 // require: target may overwrite one of the sources as long as
 //   *(sourcex + length) is before the end of sourcex
-template <class InputIterator, class OutputIterator, class CompareType>
+template <class InputIterator, class OutputIterator,
+          class CompareType, typename SizeType>
 void merge_iterator(
     InputIterator& source0,
     InputIterator& source1,
     OutputIterator target,
-    unsigned_type length,
+    SizeType length,
     CompareType& cmp)
 {
     OutputIterator done = target + length;
@@ -69,13 +70,14 @@ void merge_iterator(
 // require: at least length nonsentinel elements available in source0, source1 and source2
 // require: target may overwrite one of the sources as long as
 //   *(sourcex + length) is before the end of sourcex
-template <class InputIterator, class OutputIterator, class CompareType>
+template <class InputIterator, class OutputIterator,
+          class CompareType, typename SizeType>
 void merge3_iterator(
     InputIterator& source0,
     InputIterator& source1,
     InputIterator& source2,
     OutputIterator target,
-    unsigned_type length,
+    SizeType length,
     CompareType& cmp)
 {
     OutputIterator done = target + length;
@@ -130,25 +132,27 @@ void merge3_iterator(
 #undef Merge3Case
 }
 
-
 // merge length elements from the four sentinel terminated input
 // sequences source0, source1, source2 and source3 to target
 // advance source0, source1, source2 and source3 accordingly
 // require: at least length nonsentinel elements available in source0, source1, source2 and source3
 // require: target may overwrite one of the sources as long as
 //   *(sourcex + length) is before the end of sourcex
-template <class InputIterator, class OutputIterator, class CompareType>
+template <class InputIterator, class OutputIterator,
+          class CompareType, typename SizeType>
 void merge4_iterator(
     InputIterator& source0,
     InputIterator& source1,
     InputIterator& source2,
     InputIterator& source3,
-    OutputIterator target, unsigned_type length, CompareType& cmp)
+    OutputIterator target, SizeType length, CompareType& cmp)
 {
     OutputIterator done = target + length;
 
-#define StartMerge4(a, b, c, d)                                                                                         \
-    if ((!cmp(*source ## a, *source ## b)) && (!cmp(*source ## b, *source ## c)) && (!cmp(*source ## c, *source ## d))) \
+#define StartMerge4(a, b, c, d)               \
+    if ((!cmp(*source ## a, *source ## b)) && \
+        (!cmp(*source ## b, *source ## c)) && \
+        (!cmp(*source ## c, *source ## d)))   \
         goto s ## a ## b ## c ## d;
 
     // b>a c>b d>c
diff --git a/include/stxxl/bits/containers/priority_queue.h b/include/stxxl/bits/containers/priority_queue.h
index 8050d5c..ac3ac2b 100644
--- a/include/stxxl/bits/containers/priority_queue.h
+++ b/include/stxxl/bits/containers/priority_queue.h
@@ -10,6 +10,7 @@
  *  Copyright (C) 2003, 2004, 2007 Roman Dementiev <dementiev at mpi-sb.mpg.de>
  *  Copyright (C) 2007-2009 Johannes Singler <singler at ira.uka.de>
  *  Copyright (C) 2007-2010 Andreas Beckmann <beckmann at cs.uni-frankfurt.de>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
  *
  *  Distributed under the Boost Software License, Version 1.0.
  *  (See accompanying file LICENSE_1_0.txt or copy at
@@ -136,7 +137,6 @@ protected:
             ExtKMAX,
             alloc_strategy_type> ext_merger_type;
 
-
     int_merger_type int_mergers[num_int_groups];
     pool_type* pool;
     bool pool_owned;
@@ -166,7 +166,7 @@ private:
     void init();
 
     void refill_delete_buffer();
-    unsigned_type refill_group_buffer(unsigned_type k);
+    size_type refill_group_buffer(unsigned_type k);
 
     unsigned_type make_space_available(unsigned_type level);
     void empty_insert_heap();
@@ -308,7 +308,6 @@ public:
         for (int i = 0; i < num_ext_groups; ++i)
             dynam_alloc_mem += ext_mergers[i]->mem_cons();
 
-
         return (sizeof(*this) +
                 sizeof(ext_merger_type) * num_ext_groups +
                 dynam_alloc_mem);
@@ -320,18 +319,18 @@ public:
     //! \}
 };
 
-
 template <class ConfigType>
-inline typename priority_queue<ConfigType>::size_type priority_queue<ConfigType>::size() const
+inline typename priority_queue<ConfigType>::size_type
+priority_queue<ConfigType>::size() const
 {
     return size_ +
            insert_heap.size() - 1 +
            (delete_buffer_end - delete_buffer_current_min);
 }
 
-
 template <class ConfigType>
-inline const typename priority_queue<ConfigType>::value_type & priority_queue<ConfigType>::top() const
+inline const typename priority_queue<ConfigType>::value_type &
+priority_queue<ConfigType>::top() const
 {
     assert(!insert_heap.empty());
 
@@ -367,22 +366,20 @@ inline void priority_queue<ConfigType>::push(const value_type& obj)
     if (insert_heap.size() == N + 1)
         empty_insert_heap();
 
-
     assert(!insert_heap.empty());
 
     insert_heap.push(obj);
 }
 
-
 ////////////////////////////////////////////////////////////////
 
 template <class ConfigType>
-priority_queue<ConfigType>::priority_queue(pool_type& pool_) :
-    pool(&pool_),
-    pool_owned(false),
-    delete_buffer_end(delete_buffer + delete_buffer_size),
-    insert_heap(N + 2),
-    num_active_groups(0), size_(0)
+priority_queue<ConfigType>::priority_queue(pool_type& pool_)
+    : pool(&pool_),
+      pool_owned(false),
+      delete_buffer_end(delete_buffer + delete_buffer_size),
+      insert_heap(N + 2),
+      num_active_groups(0), size_(0)
 {
     STXXL_VERBOSE_PQ("priority_queue(pool)");
     init();
@@ -390,24 +387,24 @@ priority_queue<ConfigType>::priority_queue(pool_type& pool_) :
 
 // DEPRECATED
 template <class ConfigType>
-priority_queue<ConfigType>::priority_queue(prefetch_pool<block_type>& p_pool_, write_pool<block_type>& w_pool_) :
-    pool(new pool_type(p_pool_, w_pool_)),
-    pool_owned(true),
-    delete_buffer_end(delete_buffer + delete_buffer_size),
-    insert_heap(N + 2),
-    num_active_groups(0), size_(0)
+priority_queue<ConfigType>::priority_queue(prefetch_pool<block_type>& p_pool_, write_pool<block_type>& w_pool_)
+    : pool(new pool_type(p_pool_, w_pool_)),
+      pool_owned(true),
+      delete_buffer_end(delete_buffer + delete_buffer_size),
+      insert_heap(N + 2),
+      num_active_groups(0), size_(0)
 {
     STXXL_VERBOSE_PQ("priority_queue(p_pool, w_pool)");
     init();
 }
 
 template <class ConfigType>
-priority_queue<ConfigType>::priority_queue(unsigned_type p_pool_mem, unsigned_type w_pool_mem) :
-    pool(new pool_type(p_pool_mem / BlockSize, w_pool_mem / BlockSize)),
-    pool_owned(true),
-    delete_buffer_end(delete_buffer + delete_buffer_size),
-    insert_heap(N + 2),
-    num_active_groups(0), size_(0)
+priority_queue<ConfigType>::priority_queue(unsigned_type p_pool_mem, unsigned_type w_pool_mem)
+    : pool(new pool_type(p_pool_mem / BlockSize, w_pool_mem / BlockSize)),
+      pool_owned(true),
+      delete_buffer_end(delete_buffer + delete_buffer_size),
+      insert_heap(N + 2),
+      num_active_groups(0), size_(0)
 {
     STXXL_VERBOSE_PQ("priority_queue(pool sizes)");
     init();
@@ -451,12 +448,13 @@ priority_queue<ConfigType>::~priority_queue()
 
 // refill group_buffers[j] and return number of elements found
 template <class ConfigType>
-unsigned_type priority_queue<ConfigType>::refill_group_buffer(unsigned_type group)
+typename priority_queue<ConfigType>::size_type
+priority_queue<ConfigType>::refill_group_buffer(unsigned_type group)
 {
     STXXL_VERBOSE_PQ("refill_group_buffer(" << group << ")");
 
     value_type* target;
-    unsigned_type length;
+    size_type length;
     size_type group_size = (group < num_int_groups) ?
                            int_mergers[group].size() :
                            ext_mergers[group - num_int_groups]->size();                        // elements left in segments
@@ -480,7 +478,8 @@ unsigned_type priority_queue<ConfigType>::refill_group_buffer(unsigned_type grou
 
         // fill remaining space from group
         if (group < num_int_groups)
-            int_mergers[group].multi_merge(target + left_elements, length);
+            int_mergers[group].multi_merge(target + left_elements,
+                                           (unsigned_type)length);
         else
             ext_mergers[group - num_int_groups]->multi_merge(
                 target + left_elements,
@@ -515,11 +514,12 @@ void priority_queue<ConfigType>::refill_delete_buffer()
 
     size_type total_group_size = 0;
     //num_active_groups is <= 4
-    for (int i = (int)num_active_groups - 1; i >= 0; i--)
+    for (unsigned_type i = num_active_groups; i > 0; )
     {
+        --i;
         if ((group_buffers[i] + N) - group_buffer_current_mins[i] < delete_buffer_size)
         {
-            unsigned_type length = refill_group_buffer(i);
+            size_type length = refill_group_buffer(i);
             // max active level dry now?
             if (length == 0 && unsigned(i) == num_active_groups - 1)
                 --num_active_groups;
@@ -530,7 +530,7 @@ void priority_queue<ConfigType>::refill_delete_buffer()
             total_group_size += delete_buffer_size;  // actually only a sufficient lower bound
     }
 
-    unsigned_type length;
+    size_type length;
     if (total_group_size >= delete_buffer_size)      // buffer can be filled completely
     {
         length = delete_buffer_size;                 // amount to be copied
@@ -539,7 +539,7 @@ void priority_queue<ConfigType>::refill_delete_buffer()
     else
     {
         length = total_group_size;
-        assert(size_ == size_type(length)); // trees and group_buffers get empty
+        assert(size_ == length); // trees and group_buffers get empty
         size_ = 0;
     }
 
@@ -725,7 +725,6 @@ unsigned_type priority_queue<ConfigType>::make_space_available(unsigned_type lev
     return finalLevel;
 }
 
-
 // empty the insert heap into the main data structure
 template <class ConfigType>
 void priority_queue<ConfigType>::empty_insert_heap()
@@ -777,7 +776,7 @@ void priority_queue<ConfigType>::empty_insert_heap()
     // merge the rest to the new segment
     // note that merge exactly trips into the footsteps
     // of itself
-    priority_queue_local::merge_iterator(pos, newPos, newSegment, N, cmp);
+    priority_queue_local::merge_iterator(pos, newPos, newSegment, (unsigned_type)N, cmp);
 
     // and insert it
     unsigned_type freeLevel = make_space_available(0);
@@ -820,7 +819,7 @@ void priority_queue<ConfigType>::dump_sizes() const
         STXXL_MSG("  grp " << i << " int" <<
                   " grpbuf=" << current_group_buffer_size(i) <<
                   " size=" << int_mergers[i].size() << "/" << capacity <<
-                  " (" << (int)(int_mergers[i].size() * 100.0 / capacity) << "%)" <<
+                  " (" << (int)((double)int_mergers[i].size() * 100.0 / (double)capacity) << "%)" <<
                   " space=" << int_mergers[i].is_space_available());
     }
     for (int i = 0; i < num_ext_groups; ++i) {
@@ -828,7 +827,7 @@ void priority_queue<ConfigType>::dump_sizes() const
         STXXL_MSG("  grp " << i + num_int_groups << " ext" <<
                   " grpbuf=" << current_group_buffer_size(i + num_int_groups) <<
                   " size=" << ext_mergers[i]->size() << "/" << capacity <<
-                  " (" << (int)(ext_mergers[i]->size() * 100.0 / capacity) << "%)" <<
+                  " (" << (int)((double)ext_mergers[i]->size() * 100.0 / (double)capacity) << "%)" <<
                   " space=" << ext_mergers[i]->is_space_available());
     }
     dump_params();
@@ -854,51 +853,80 @@ struct dummy
     typedef dummy result;
 };
 
-template <unsigned_type E_, internal_size_type IntMem_, external_size_type MaxItems, unsigned_type B_, unsigned_type m_, bool stop = false>
+template <internal_size_type ElementSize, internal_size_type IntMem,
+          external_size_type MaxItems, internal_size_type BlockSize,
+          unsigned_type m_, bool stop = false>
 struct find_B_m
 {
-    typedef find_B_m<E_, IntMem_, MaxItems, B_, m_, stop> Self;
-
-    static const unsigned_type k = IntMem_ / B_;       // number of blocks that fit into M
-    static const unsigned_type element_size = E_;      // element size
-    static const internal_size_type IntMem = IntMem_;
-    static const unsigned_type B = B_;                 // block size
-    static const external_size_type m = m_;            // number of blocks fitting into buffers
-    static const unsigned_type c = k - m_;
+    typedef find_B_m<ElementSize, IntMem,
+                     MaxItems, BlockSize, m_, stop> self_type;
+
+    //! element size
+    static const internal_size_type element_size = ElementSize;
+    //! internal memory size of PQ
+    static const internal_size_type intmem = IntMem;
+    //! block size (iterates from 8 MiB downwards)
+    static const internal_size_type B = BlockSize;
+
+    //! number of blocks that fit into internal memory (M)
+    static const internal_size_type k = IntMem / BlockSize;
+    //! number of blocks fitting into buffers of mergers (arity of both
+    //! mergers), increased from 1 to 2048 ?-tb
+    static const internal_size_type m = m_;
+    //! remaining blocks, (freely moving, not necessarily unused) ?-tb
+    static const int_type c = k - m_;
+
     // memory occupied by block must be at least 10 times larger than size of ext sequence
-    // && satisfy memory req && if we have two ext mergers their degree must be at least 64=m/2
-    static const external_size_type fits = (c > 10) &&
-                                           (((k - m) * (m) * (m * B / (element_size * 4 * 1024))) >= MaxItems) &&
-                                           ((MaxItems < ((k - m) * m / (2 * element_size)) * 1024) || m >= 128);
+
+    //! calculated boolean whether the configuration fits into internal memory.
+    static const external_size_type fits =
+        // need some temporary constant-size internal blocks
+        (c > 10) &&
+        // satisfy items requirement
+        (((k - m) * m * (m * B / (ElementSize * 4 * 1024))) >= MaxItems) &&
+        // if we have two ext mergers their degree must be at least 64=m/2
+        ((MaxItems < ((k - m) * m / (2 * ElementSize)) * 1024) || m >= 128);
+
     static const unsigned_type step = 1;
 
-    typedef typename find_B_m<element_size, IntMem, MaxItems, B, m + step, fits || (m >= k - step)>::result candidate1;
-    typedef typename find_B_m<element_size, IntMem, MaxItems, B / 2, 1, fits || candidate1::fits>::result candidate2;
-    typedef typename IF<fits, Self, typename IF<candidate1::fits, candidate1, candidate2>::result>::result result;
+    //! if not fits, recurse into configuration with +step more internal buffers
+    typedef typename find_B_m<ElementSize, IntMem, MaxItems, B,
+                              m + step, fits || (m + step >= k)>::result candidate1;
+    //! if not fits, recurse into configuration with block size halved.
+    typedef typename find_B_m<ElementSize, IntMem, MaxItems, B / 2,
+                              1, fits || candidate1::fits>::result candidate2;
+
+    //! return a fitting configuration.
+    typedef typename IF<fits, self_type, typename IF<candidate1::fits, candidate1, candidate2>::result>::result result;
 };
 
 // specialization for the case when no valid parameters are found
-template <unsigned_type E_, unsigned_type IntMem, unsigned_type MaxItems, bool stop>
-struct find_B_m<E_, IntMem, MaxItems, 2048, 1, stop>
+template <internal_size_type ElementSize, internal_size_type IntMem,
+          external_size_type MaxItems, bool stop>
+struct find_B_m<ElementSize, IntMem, MaxItems, 2048, 1, stop>
 {
     enum { fits = false };
     typedef Parameters_for_priority_queue_not_found_Increase_IntMem result;
 };
 
 // to speedup search
-template <unsigned_type E_, unsigned_type IntMem, unsigned_type MaxItems, unsigned_type B_, unsigned_type m_>
-struct find_B_m<E_, IntMem, MaxItems, B_, m_, true>
+template <internal_size_type ElementSize, internal_size_type IntMem,
+          external_size_type MaxItems, unsigned_type BlockSize,
+          unsigned_type m_>
+struct find_B_m<ElementSize, IntMem, MaxItems, BlockSize, m_, true>
 {
     enum { fits = false };
     typedef dummy result;
 };
 
-// E_ size of element in bytes
-template <unsigned_type E_, unsigned_type IntMem, unsigned_type MaxItems>
+// start search
+template <internal_size_type ElementSize, internal_size_type IntMem,
+          external_size_type MaxItems>
 struct find_settings
 {
     // start from block size (8*1024*1024) bytes
-    typedef typename find_B_m<E_, IntMem, MaxItems, (8* 1024* 1024), 1>::result result;
+    typedef typename find_B_m<ElementSize, IntMem,
+                              MaxItems, (8* 1024* 1024), 1>::result result;
 };
 
 struct Parameters_not_found_Try_to_change_the_Tune_parameter
@@ -906,7 +934,6 @@ struct Parameters_not_found_Try_to_change_the_Tune_parameter
     typedef Parameters_not_found_Try_to_change_the_Tune_parameter result;
 };
 
-
 template <unsigned_type AI_, unsigned_type X_, unsigned_type CriticalSize>
 struct compute_N
 {
@@ -932,7 +959,7 @@ struct compute_N<1, X_, CriticalSize_>
 //! \addtogroup stlcont
 //! \{
 
-//! \brief Priority queue type generator. \n
+//! Priority queue type generator. \n
 //! <b> Introduction </b> to priority queue container: see \ref tutorial_pqueue tutorial. \n
 //! <b> Design and Internals </b> of priority queue container: see \ref design_pqueue.
 //!
@@ -997,7 +1024,6 @@ public:
 
 STXXL_END_NAMESPACE
 
-
 namespace std {
 
 template <class ConfigType>
diff --git a/include/stxxl/bits/containers/queue.h b/include/stxxl/bits/containers/queue.h
index c20bede..c164c6b 100644
--- a/include/stxxl/bits/containers/queue.h
+++ b/include/stxxl/bits/containers/queue.h
@@ -27,7 +27,6 @@
 #include <stxxl/bits/mng/write_pool.h>
 #include <stxxl/bits/mng/prefetch_pool.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 #ifndef STXXL_VERBOSE_QUEUE
@@ -87,11 +86,11 @@ public:
     //! \param D  number of parallel disks, defaulting to the configured number of scratch disks,
     //!           memory consumption will be 2 * D + 2 blocks
     //!           (first and last block, D blocks as write cache, D block for prefetching)
-    explicit queue(int_type D = -1) :
-        m_size(0),
-        delete_pool(true),
-        alloc_count(0),
-        bm(block_manager::get_instance())
+    explicit queue(int_type D = -1)
+        : m_size(0),
+          delete_pool(true),
+          alloc_count(0),
+          bm(block_manager::get_instance())
     {
         if (D < 1)
             D = config::get_instance()->disks_number();
@@ -106,11 +105,11 @@ public:
     //! \param p_pool_size  number of blocks in the prefetch pool, recommended at least 1
     //! \param blocks2prefetch_  defines the number of blocks to prefetch (\c front side),
     //!                          default is number of block in the prefetch pool
-    explicit queue(unsigned_type w_pool_size, unsigned_type p_pool_size, int blocks2prefetch_ = -1) :
-        m_size(0),
-        delete_pool(true),
-        alloc_count(0),
-        bm(block_manager::get_instance())
+    explicit queue(unsigned_type w_pool_size, unsigned_type p_pool_size, int blocks2prefetch_ = -1)
+        : m_size(0),
+          delete_pool(true),
+          alloc_count(0),
+          bm(block_manager::get_instance())
     {
         STXXL_VERBOSE_QUEUE("queue[" << this << "]::queue(sizes)");
         pool = new pool_type(p_pool_size, w_pool_size);
@@ -126,11 +125,11 @@ public:
     //!  \warning Number of blocks in the write pool must be at least 2, recommended at least 3
     //!  \warning Number of blocks in the prefetch pool recommended at least 1
     STXXL_DEPRECATED(
-        queue(write_pool<block_type>& w_pool, prefetch_pool<block_type>& p_pool, int blocks2prefetch_ = -1)) :
-        m_size(0),
-        delete_pool(true),
-        alloc_count(0),
-        bm(block_manager::get_instance())
+        queue(write_pool<block_type>& w_pool, prefetch_pool<block_type>& p_pool, int blocks2prefetch_ = -1))
+        : m_size(0),
+          delete_pool(true),
+          alloc_count(0),
+          bm(block_manager::get_instance())
     {
         STXXL_VERBOSE_QUEUE("queue[" << this << "]::queue(pools)");
         pool = new pool_type(p_pool, w_pool);
@@ -144,12 +143,12 @@ public:
     //!                          default is number of blocks in the prefetch pool
     //!  \warning Number of blocks in the write pool must be at least 2, recommended at least 3
     //!  \warning Number of blocks in the prefetch pool recommended at least 1
-    queue(pool_type& pool_, int blocks2prefetch_ = -1) :
-        m_size(0),
-        delete_pool(false),
-        pool(&pool_),
-        alloc_count(0),
-        bm(block_manager::get_instance())
+    queue(pool_type& pool_, int blocks2prefetch_ = -1)
+        : m_size(0),
+          delete_pool(false),
+          pool(&pool_),
+          alloc_count(0),
+          bm(block_manager::get_instance())
     {
         STXXL_VERBOSE_QUEUE("queue[" << this << "]::queue(pool)");
         init(blocks2prefetch_);
diff --git a/include/stxxl/bits/containers/sequence.h b/include/stxxl/bits/containers/sequence.h
index 43e175c..2d1717f 100644
--- a/include/stxxl/bits/containers/sequence.h
+++ b/include/stxxl/bits/containers/sequence.h
@@ -25,7 +25,6 @@
 #include <stxxl/bits/mng/write_pool.h>
 #include <stxxl/bits/mng/prefetch_pool.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 #ifndef STXXL_VERBOSE_SEQUENCE
@@ -35,7 +34,7 @@ STXXL_BEGIN_NAMESPACE
 //! \addtogroup stlcont
 //! \{
 
-//! \brief External sequence or deque container without random access. \n
+//! External sequence or deque container without random access. \n
 //! <b> Introduction </b> to sequence container: see \ref tutorial_sequence tutorial. \n
 //! <b> Design and Internals </b> of sequence container: see \ref design_queue
 
@@ -123,11 +122,11 @@ public:
     //! \param D  number of parallel disks, defaulting to the configured number of scratch disks,
     //!           memory consumption will be 2 * D + 2 blocks
     //!           (first and last block, D blocks as write cache, D block for prefetching)
-    explicit sequence(int_type D = -1) :
-        m_size(0),
-        m_owns_pool(true),
-        m_alloc_count(0),
-        m_bm(block_manager::get_instance())
+    explicit sequence(int_type D = -1)
+        : m_size(0),
+          m_owns_pool(true),
+          m_alloc_count(0),
+          m_bm(block_manager::get_instance())
     {
         if (D < 1) D = config::get_instance()->disks_number();
         STXXL_VERBOSE_SEQUENCE("sequence[" << this << "]::sequence(D)");
@@ -141,11 +140,11 @@ public:
     //! \param p_pool_size  number of blocks in the prefetch pool, recommended at least 1
     //! \param blocks2prefetch  defines the number of blocks to prefetch (\c front side),
     //!                          default is number of block in the prefetch pool
-    explicit sequence(unsigned_type w_pool_size, unsigned_type p_pool_size, int blocks2prefetch = -1) :
-        m_size(0),
-        m_owns_pool(true),
-        m_alloc_count(0),
-        m_bm(block_manager::get_instance())
+    explicit sequence(unsigned_type w_pool_size, unsigned_type p_pool_size, int blocks2prefetch = -1)
+        : m_size(0),
+          m_owns_pool(true),
+          m_alloc_count(0),
+          m_bm(block_manager::get_instance())
     {
         STXXL_VERBOSE_SEQUENCE("sequence[" << this << "]::sequence(sizes)");
         m_pool = new pool_type(p_pool_size, w_pool_size);
@@ -158,12 +157,12 @@ public:
     //! \param blocks2prefetch  defines the number of blocks to prefetch (\c front side), default is number of blocks in the prefetch pool
     //!  \warning Number of blocks in the write pool must be at least 2, recommended at least 3
     //!  \warning Number of blocks in the prefetch pool recommended at least 1
-    sequence(pool_type& pool, int blocks2prefetch = -1) :
-        m_size(0),
-        m_owns_pool(false),
-        m_pool(&pool),
-        m_alloc_count(0),
-        m_bm(block_manager::get_instance())
+    sequence(pool_type& pool, int blocks2prefetch = -1)
+        : m_size(0),
+          m_owns_pool(false),
+          m_pool(&pool),
+          m_alloc_count(0),
+          m_bm(block_manager::get_instance())
     {
         STXXL_VERBOSE_SEQUENCE("sequence[" << this << "]::sequence(pool)");
         init(blocks2prefetch);
@@ -562,7 +561,7 @@ public:
 
     //! \}
 
-    /********************************************************************************/
+    /**************************************************************************/
 
     class stream
     {
@@ -693,7 +692,7 @@ public:
 
     //! \}
 
-    /********************************************************************************/
+    /**************************************************************************/
 
     class reverse_stream
     {
diff --git a/include/stxxl/bits/containers/sorter.h b/include/stxxl/bits/containers/sorter.h
index d1cd17d..013ac5f 100644
--- a/include/stxxl/bits/containers/sorter.h
+++ b/include/stxxl/bits/containers/sorter.h
@@ -30,8 +30,7 @@ STXXL_BEGIN_NAMESPACE
 //! <b> Design and Internals </b> of sorter container: see \ref design_sorter
 
 /**
- * \brief External Sorter: use stream package objects to keep a sorted
- * container.
+ * External Sorter: use stream package objects to keep a sorted container.
  *
  * This sorter container combines the two functions of runs_creator and
  * runs_merger from the stream packages into a two-phase container.
@@ -84,6 +83,9 @@ public:
     typedef stream::runs_merger<typename runs_creator_type::sorted_runs_type,
                                 cmp_type, alloc_strategy_type> runs_merger_type;
 
+    //! size type
+    typedef typename runs_merger_type::size_type size_type;
+
 protected:
     // *** Object Attributes
 
@@ -100,7 +102,6 @@ public:
     //! \name Constructors
     //! \{
 
-
     //! Constructor allocation memory_to_use bytes in ram for sorted runs.
     sorter(const cmp_type& cmp, unsigned_type memory_to_use)
         : m_state(STATE_INPUT),
@@ -230,7 +231,7 @@ public:
     //! \{
 
     //! Number of items pushed or items remaining to be read.
-    unsigned_type size() const
+    size_type size() const
     {
         if (m_state == STATE_INPUT)
             return m_runs_creator.size();
diff --git a/include/stxxl/bits/containers/stack.h b/include/stxxl/bits/containers/stack.h
index 8f6430b..76091fa 100644
--- a/include/stxxl/bits/containers/stack.h
+++ b/include/stxxl/bits/containers/stack.h
@@ -28,7 +28,6 @@
 #include <stxxl/bits/mng/write_pool.h>
 #include <stxxl/bits/mng/prefetch_pool.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \defgroup stlcont_stack stack
@@ -40,7 +39,7 @@ template <class ValueType,
           unsigned BlocksPerPage = 4,
           unsigned BlockSize = STXXL_DEFAULT_BLOCK_SIZE(ValueType),
           class AllocStr = STXXL_DEFAULT_ALLOC_STRATEGY,
-          class SizeType = stxxl::int64>
+          class SizeType = stxxl::uint64>
 struct stack_config_generator
 {
     typedef ValueType value_type;
@@ -50,7 +49,6 @@ struct stack_config_generator
     typedef SizeType size_type;
 };
 
-
 //! External stack container.
 //! <b> Introduction </b> to stack container: see \ref tutorial_stack tutorial. \n
 //! <b> Design and Internals </b> of stack container: see \ref design_stack
@@ -93,14 +91,14 @@ public:
     //! \{
 
     //! Default constructor: creates empty stack.
-    normal_stack() :
-        m_size(0),
-        cache_offset(0),
-        current_element(NULL),
-        cache(blocks_per_page * 2),
-        front_page(cache.begin() + blocks_per_page),
-        back_page(cache.begin()),
-        bids(0)
+    normal_stack()
+        : m_size(0),
+          cache_offset(0),
+          current_element(NULL),
+          cache(blocks_per_page * 2),
+          front_page(cache.begin() + blocks_per_page),
+          back_page(cache.begin()),
+          bids(0)
     {
         bids.reserve(blocks_per_page);
     }
@@ -128,31 +126,30 @@ public:
     //! Copy-construction from a another stack of any type.
     //! \param stack_ stack object (could be external or internal, important is that it must
     //! have a copy constructor, \c top() and \c pop() methods )
-    template <class stack_type>
-    normal_stack(const stack_type& stack_) :
-        m_size(0),
-        cache_offset(0),
-        current_element(NULL),
-        cache(blocks_per_page * 2),
-        front_page(cache.begin() + blocks_per_page),
-        back_page(cache.begin()),
-        bids(0)
+    template <class StackType>
+    normal_stack(const StackType& stack_)
+        : m_size(0),
+          cache_offset(0),
+          current_element(NULL),
+          cache(blocks_per_page * 2),
+          front_page(cache.begin() + blocks_per_page),
+          back_page(cache.begin()),
+          bids(0)
     {
         bids.reserve(blocks_per_page);
 
-        stack_type stack_copy = stack_;
-        const size_type sz = stack_copy.size();
-        size_type i;
+        StackType stack_copy = stack_;
+        size_t sz = stack_copy.size();
 
         std::vector<value_type> tmp(sz);
 
-        for (i = 0; i < sz; ++i)
-        {
+        for (size_t i = 0; i < sz; ++i) {
             tmp[sz - i - 1] = stack_copy.top();
             stack_copy.pop();
         }
-        for (i = 0; i < sz; ++i)
-            this->push(tmp[i]);
+
+        for (size_t i = 0; i < sz; ++i)
+            push(tmp[i]);
     }
 
     virtual ~normal_stack()
@@ -221,7 +218,6 @@ public:
                 requests[i] = (back_page + i)->write(*cur_bid);
             }
 
-
             std::swap(back_page, front_page);
 
             bids.reserve(bids.size() + blocks_per_page);
@@ -292,13 +288,11 @@ private:
         if (offset < blocks_per_page * block_type::size)
             return &((*(back_page + offset / block_type::size))[offset % block_type::size]);
 
-
         unsigned_type unbiased_offset = offset - blocks_per_page * block_type::size;
         return &((*(front_page + unbiased_offset / block_type::size))[unbiased_offset % block_type::size]);
     }
 };
 
-
 //! Efficient implementation that uses prefetching and overlapping using internal buffers.
 //!
 //! Use it if your access pattern consists of many repeated push'es and pop's
@@ -339,15 +333,15 @@ public:
     //! \{
 
     //! Default constructor: creates empty stack.
-    grow_shrink_stack() :
-        m_size(0),
-        cache_offset(0),
-        current_element(NULL),
-        cache(blocks_per_page * 2),
-        cache_buffers(cache.begin()),
-        overlap_buffers(cache.begin() + blocks_per_page),
-        requests(blocks_per_page),
-        bids(0)
+    grow_shrink_stack()
+        : m_size(0),
+          cache_offset(0),
+          current_element(NULL),
+          cache(blocks_per_page * 2),
+          cache_buffers(cache.begin()),
+          overlap_buffers(cache.begin() + blocks_per_page),
+          requests(blocks_per_page),
+          bids(0)
     {
         bids.reserve(blocks_per_page);
     }
@@ -378,32 +372,32 @@ public:
     //! Copy-construction from a another stack of any type.
     //! \param stack_ stack object (could be external or internal, important is that it must
     //! have a copy constructor, \c top() and \c pop() methods )
-    template <class stack_type>
-    grow_shrink_stack(const stack_type& stack_) :
-        m_size(0),
-        cache_offset(0),
-        current_element(NULL),
-        cache(blocks_per_page * 2),
-        cache_buffers(cache.begin()),
-        overlap_buffers(cache.begin() + blocks_per_page),
-        requests(blocks_per_page),
-        bids(0)
+    template <class StackType>
+    grow_shrink_stack(const StackType& stack_)
+        : m_size(0),
+          cache_offset(0),
+          current_element(NULL),
+          cache(blocks_per_page * 2),
+          cache_buffers(cache.begin()),
+          overlap_buffers(cache.begin() + blocks_per_page),
+          requests(blocks_per_page),
+          bids(0)
     {
         bids.reserve(blocks_per_page);
 
-        stack_type stack_copy = stack_;
-        const size_type sz = stack_copy.size();
-        size_type i;
+        StackType stack_copy = stack_;
+        size_t sz = stack_copy.size();
 
         std::vector<value_type> tmp(sz);
 
-        for (i = 0; i < sz; ++i)
+        for (size_t i = 0; i < sz; ++i)
         {
             tmp[sz - i - 1] = stack_copy.top();
             stack_copy.pop();
         }
-        for (i = 0; i < sz; ++i)
-            this->push(tmp[i]);
+
+        for (size_t i = 0; i < sz; ++i)
+            push(tmp[i]);
     }
     virtual ~grow_shrink_stack()
     {
@@ -512,7 +506,6 @@ public:
             if (requests[0].get())
                 wait_all(requests.begin(), blocks_per_page);
 
-
             std::swap(cache_buffers, overlap_buffers);
 
             if (bids.size() > blocks_per_page)
@@ -582,9 +575,8 @@ public:
     //! read_write_pool for prefetching and buffered writing.
     //! \param pool_ block write/prefetch pool
     //! \param prefetch_aggressiveness number of blocks that will be used from prefetch pool
-    grow_shrink_stack2(
-        pool_type& pool_,
-        unsigned_type prefetch_aggressiveness = 0)
+    grow_shrink_stack2(pool_type& pool_,
+                       unsigned_type prefetch_aggressiveness = 0)
         : m_size(0),
           cache_offset(0),
           cache(new block_type),
@@ -604,17 +596,16 @@ public:
     //! \param w_pool_ write pool, that will be used for block writing
     //! \param prefetch_aggressiveness number of blocks that will be used from prefetch pool
     STXXL_DEPRECATED(
-        grow_shrink_stack2(
-            prefetch_pool<block_type>& p_pool_,
-            write_pool<block_type>& w_pool_,
-            unsigned_type prefetch_aggressiveness = 0)
-        ) :
-        m_size(0),
-        cache_offset(0),
-        cache(new block_type),
-        pref_aggr(prefetch_aggressiveness),
-        owned_pool(new pool_type(p_pool_, w_pool_)),
-        pool(owned_pool)
+        grow_shrink_stack2(prefetch_pool<block_type>& p_pool_,
+                           write_pool<block_type>& w_pool_,
+                           unsigned_type prefetch_aggressiveness = 0)
+        )
+        : m_size(0),
+          cache_offset(0),
+          cache(new block_type),
+          pref_aggr(prefetch_aggressiveness),
+          owned_pool(new pool_type(p_pool_, w_pool_)),
+          pool(owned_pool)
     {
         STXXL_VERBOSE2("grow_shrink_stack2::grow_shrink_stack2(...)");
     }
@@ -817,7 +808,6 @@ private:
     }
 };
 
-
 //! A stack that migrates from internal memory to external when its size exceeds a certain threshold.
 //!
 //! For semantics of the methods see documentation of the STL \c std::stack.
@@ -846,8 +836,8 @@ private:
 
     //! Copy-construction from a another stack of any type.
     //! \warning not implemented yet!
-    template <class stack_type>
-    migrating_stack(const stack_type& stack_);
+    template <class StackType>
+    migrating_stack(const StackType& stack_);
 
 public:
     //! \name Constructors/Destructors
@@ -971,7 +961,7 @@ public:
 enum stack_externality { external, migrating, internal };
 enum stack_behaviour { normal, grow_shrink, grow_shrink2 };
 
-//! \brief Stack type generator \n
+//! Stack type generator \n
 //! <b> Introduction </b> to stack container: see \ref tutorial_stack tutorial. \n
 //! <b> Design and Internals </b> of stack container: see \ref design_stack.
 //!
@@ -1001,7 +991,7 @@ enum stack_behaviour { normal, grow_shrink, grow_shrink2 };
 //!
 //! \tparam AllocStr one of allocation strategies: striping, RC, SR, or FR. Default is \b RC.
 //!
-//! \tparam SizeType size type, default is \b stxxl::int64.
+//! \tparam SizeType size type, default is \b stxxl::uint64.
 //!
 //! The configured stack type is available as STACK_GENERATOR<>::result.
 //!
@@ -1016,7 +1006,7 @@ template <
     unsigned_type MigrCritSize = (2* BlocksPerPage* BlockSize),
 
     class AllocStr = STXXL_DEFAULT_ALLOC_STRATEGY,
-    class SizeType = stxxl::int64
+    class SizeType = stxxl::uint64
     >
 class STACK_GENERATOR
 {
@@ -1037,7 +1027,6 @@ public:
 
 STXXL_END_NAMESPACE
 
-
 namespace std {
 
 template <class StackConfig>
diff --git a/include/stxxl/bits/containers/unordered_map.h b/include/stxxl/bits/containers/unordered_map.h
new file mode 100644
index 0000000..41d5e25
--- /dev/null
+++ b/include/stxxl/bits/containers/unordered_map.h
@@ -0,0 +1,498 @@
+/***************************************************************************
+ *  include/stxxl/bits/containers/unordered_map.h
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2008 Markus Westphal <marwes at users.sourceforge.net>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#ifndef STXXL_CONTAINERS_UNORDERED_MAP_HEADER
+#define STXXL_CONTAINERS_UNORDERED_MAP_HEADER
+
+#include <stxxl/bits/noncopyable.h>
+#include <stxxl/bits/containers/hash_map/hash_map.h>
+
+STXXL_BEGIN_NAMESPACE
+
+namespace hash_map {
+
+template <
+    class KeyType,
+    class DataType,
+    class HashType,
+    class CompareType,
+    unsigned SubBlockSize,
+    unsigned SubBlocksPerBlock,
+    class Alloc
+    >
+class hash_map;
+
+} // namespace hash_map
+
+//! \addtogroup stlcont
+//! \{
+
+/*!
+ * An external memory implementation of the STL unordered_map container, which
+ * is based on an external memory hash map. For more information see \ref
+ * tutorial_unordered_map.
+ *
+ * \tparam KeyType the key type
+ * \tparam MappedType the mapped type associated with a key
+ * \tparam HashType a hash functional
+ * \tparam CompareType a less comparison relation for KeyType
+ * \tparam SubBlockSize the raw size of a subblock (caching granularity)
+ * (default: 8192)
+ * \tparam SubBlocksPerBlock the number of subblocks per external block
+ * (default: 256 -> 2MB blocks)
+ * \tparam AllocType allocator for internal-memory buffer
+ */
+template <
+    class KeyType,
+    class MappedType,
+    class HashType,
+    class CompareType,
+    unsigned SubBlockSize = 8* 1024,
+    unsigned SubBlocksPerBlock = 256,
+    class AllocType = std::allocator<std::pair<const KeyType, MappedType> >
+    >
+class unordered_map : private noncopyable
+{
+    typedef hash_map::hash_map<KeyType, MappedType, HashType, CompareType,
+                               SubBlockSize, SubBlocksPerBlock, AllocType> impl_type;
+
+    impl_type impl;
+
+public:
+    //! \name Types
+    //! \{
+
+    //! the first template parameter (Key)
+    typedef typename impl_type::key_type key_type;
+    //! the second template parameter (T)
+    typedef typename impl_type::mapped_type mapped_type;
+    //! pair<const key_type,mapped_type>
+    typedef typename impl_type::value_type value_type;
+    //! the third template parameter (HashType)
+    typedef typename impl_type::hasher hasher;
+    //! the fourth template parameter (CompareType) (!!! not: equality compare)
+    typedef typename impl_type::key_compare key_compare;
+    //! the fifth template parameter (AllocType)
+    typedef AllocType allocator_type;
+
+    typedef typename impl_type::reference reference;
+    typedef typename impl_type::const_reference const_reference;
+    typedef typename impl_type::pointer pointer;
+    typedef typename impl_type::const_pointer const_pointer;
+
+    typedef typename impl_type::external_size_type size_type;
+    typedef typename impl_type::difference_type difference_type;
+
+    typedef typename impl_type::external_size_type external_size_type;
+    typedef typename impl_type::internal_size_type internal_size_type;
+
+    typedef typename impl_type::iterator iterator;
+    typedef typename impl_type::const_iterator const_iterator;
+
+    //! constructed equality predicate for key
+    typedef typename impl_type::key_equal key_equal;
+
+    //! \}
+
+    //! \name Constructors
+    //! \{
+
+    /*!
+     * Construct a new hash-map
+     *
+     * \param n initial number of buckets
+     * \param hf hash-function
+     * \param cmp comparator-object
+     * \param buffer_size size of internal-memory buffer in bytes
+     * \param a allocation-strategory for internal-memory buffer
+     */
+    unordered_map(internal_size_type n = 0,
+                  const hasher& hf = hasher(),
+                  const key_compare& cmp = key_compare(),
+                  internal_size_type buffer_size = 100*1024*1024,
+                  const allocator_type& a = allocator_type())
+        : impl(n, hf, cmp, buffer_size, a)
+    { }
+
+    /*!
+     * Construct a new hash-map and insert all values in the range [begin,end)
+     *
+     * \param begin beginning of the range
+     * \param end end of the range
+     * \param mem_to_sort internal memory that may be used for
+     * bulk-construction (not to be confused with the buffer-memory)
+     * \param n initial number of buckets
+     * \param hf hash-function
+     * \param cmp comparator-object
+     * \param buffer_size size of internal-memory buffer in bytes
+     * \param a allocation-strategory for internal-memory buffer
+     */
+    template <class InputIterator>
+    unordered_map(InputIterator begin, InputIterator end,
+                  internal_size_type mem_to_sort = 256*1024*1024,
+                  internal_size_type n = 0,
+                  const hasher& hf = hasher(),
+                  const key_compare& cmp = key_compare(),
+                  internal_size_type buffer_size = 100*1024*1024,
+                  const allocator_type& a = allocator_type())
+        : impl(begin, end, mem_to_sort, n, hf, cmp, buffer_size, a)
+    { }
+
+    //! \}
+
+    //! \name Size and Capacity
+    //! \{
+
+    //! Number of values currently stored. Note: If the correct number is
+    //! currently unknown (because **oblivous-methods** were used), external
+    //! memory will be scanned.
+    external_size_type size() const
+    {
+        return impl.size();
+    }
+
+    //! The hash-map may store up to this number of values
+    external_size_type max_size() const
+    {
+        return impl.max_size();
+    }
+
+    //! Check if container is empty, see size() about oblivious-methods.
+    bool empty() const
+    {
+        return impl.empty();
+    }
+
+    //! \}
+
+    //! \name Iterators
+    //! \{
+
+    //! iterator pointing to the beginnning of the hash-map
+    iterator begin()
+    {
+        return impl.begin();
+    }
+
+    //! iterator pointing to the end of the hash-map (iterator-type as
+    //! template-parameter)
+    iterator end()
+    {
+        return impl.end();
+    }
+
+    //! iterator pointing to the beginnning of the hash-map
+    const_iterator begin() const
+    {
+        return impl.begin();
+    }
+
+    //! iterator pointing to the end of the hash-map (iterator-type as
+    //! template-parameter)
+    const_iterator end() const
+    {
+        return impl.end();
+    }
+
+    //! \}
+
+    //! \name Lookup and Element Access
+    //! \{
+
+    //! Convenience operator to quickly insert or find values. Use with caution
+    //! since using this operator will check external-memory.
+    mapped_type& operator [] (const key_type& key)
+    {
+        return impl[key];
+    }
+
+    //! Look up value by key. Non-const access.
+    //! \param key key for value to look up
+    iterator find(const key_type& key)
+    {
+        return impl.find(key);
+    }
+
+    //! Look up value by key. Const access.
+    //! \param key key for value to look up
+    const_iterator find(const key_type& key) const
+    {
+        return impl.find(key);
+    }
+
+    //! Number of values with given key
+    //! \param key key for value to look up
+    //! \return 0 or 1 depending on the presence of a value with the given key
+    external_size_type count(const key_type& key) const
+    {
+        return impl.count(key);
+    }
+
+    //! Finds a range containing all values with given key. Non-const access
+    //! \param key key to look for#
+    //! \return range may be empty or contains exactly one value
+    std::pair<iterator, iterator>
+    equal_range(const key_type& key)
+    {
+        return impl.equal_range(key);
+    }
+
+    //! Finds a range containing all values with given key. Const access
+    //! \param key key to look for#
+    //! \return range may be empty or contains exactly one value
+    std::pair<const_iterator, const_iterator>
+    equal_range(const key_type& key) const
+    {
+        return impl.equal_range(key);
+    }
+
+    //! \}
+
+    //! \name Modifiers: Insert
+    //! \{
+
+    /*!
+     * Insert a new value if no value with the same key is already present;
+     * external memory must therefore be accessed
+     *
+     * \param value what to insert
+     * \return a tuple whose second part is true iff the value was actually
+     * added (no value with the same key present); the first part is an
+     * iterator pointing to the newly inserted or already stored value
+     */
+    std::pair<iterator, bool> insert(const value_type& value)
+    {
+        return impl.insert(value);
+    }
+
+    //! Insert a value; external memory is not accessed so that another value
+    //! with the same key may be overwritten
+    //! \param value what to insert
+    //! \return iterator pointing to the inserted value
+    iterator insert_oblivious(const value_type& value)
+    {
+        return impl.insert_oblivious(value);
+    }
+
+    //! Bulk-insert of values in the range [f, l)
+    //! \param first beginning of the range
+    //! \param last end of the range
+    //! \param mem internal memory that may be used (note: this memory will be
+    //! used additionally to the buffer). The more the better
+    template <class InputIterator>
+    void insert(InputIterator first, InputIterator last, internal_size_type mem)
+    {
+        impl.insert(first, last, mem);
+    }
+
+    //! \}
+
+    //! \name Modifiers: Erase
+    //! \{
+
+    //! Erase value by iterator
+    //! \param it iterator pointing to the value to erase
+    void erase(const_iterator it)
+    {
+        impl.erase(it);
+    }
+
+    //! Erase value by key; check external memory
+    //! \param key key of value to erase
+    //! \return number of values actually erased (0 or 1)
+    external_size_type erase(const key_type& key)
+    {
+        return impl.erase(key);
+    }
+
+    //! Erase value by key but without looking at external memory
+    //! \param key key for value to release
+    void erase_oblivious(const key_type& key)
+    {
+        impl.erase_oblivious(key);
+    }
+
+    //! Reset hash-map: erase all values, invalidate all iterators
+    void clear()
+    {
+        impl.clear();
+    }
+
+    //! Exchange stored values with another hash-map
+    //! \param obj hash-map to swap values with
+    void swap(unordered_map& obj)
+    {
+        std::swap(impl, obj.impl);
+    }
+
+    //! \}
+
+    //! \name Bucket Interface
+    //! \{
+
+    //! Number of buckets
+    internal_size_type bucket_count() const
+    {
+        return impl.bucket_count();
+    }
+
+    //! Maximum number of buckets
+    internal_size_type max_bucket_count() const
+    {
+        return impl.max_bucket_count();
+    }
+
+    // bucket_size()?
+
+    //! Bucket-index for values with given key.
+    internal_size_type bucket(const key_type& k) const
+    {
+        return impl.bucket_index(k);
+    }
+
+    //! \}
+
+    //! \name Hash Policy
+    //! \{
+
+    //! Average number of (sub)blocks occupied by a bucket.
+    float load_factor() const
+    {
+        return impl.load_factor();
+    }
+
+    //! Set desired load-factor
+    float opt_load_factor() const
+    {
+        return impl.opt_load_factor();
+    }
+
+    //! Set desired load-factor
+    void opt_load_factor(float z)
+    {
+        impl.opt_load_factor(z);
+    }
+
+    //! Rehash with (at least) n buckets
+    void rehash(internal_size_type n)
+    {
+        impl.rehash(n);
+    }
+
+    //! \}
+
+    //! \name Observers
+    //! \{
+
+    //! Hash-function used by this hash-map
+    hasher hash_function() const
+    {
+        return impl.hash_function();
+    }
+
+    //! Strict-weak-ordering used by this hash-map
+    key_compare key_comp() const
+    {
+        return impl.key_cmp();
+    }
+
+    //! Constructed equality predicate used by this hash-map
+    key_equal key_eq() const
+    {
+        return impl.key_eq();
+    }
+
+    //! Get node memory allocator
+    allocator_type get_allocator() const
+    {
+        return impl.get_allocator();
+    }
+
+    //! \}
+
+    //! \name Internal Memory Buffer Policy
+    //! \{
+
+    //! Number of bytes occupied by buffer
+    internal_size_type buffer_size() const
+    {
+        return impl.buffer_size();
+    }
+
+    //! Maximum buffer size in byte
+    internal_size_type max_buffer_size() const
+    {
+        return impl.max_buffer_size();
+    }
+
+    //! Set maximum buffer size
+    //! \param buffer_size new size in byte
+    void max_buffer_size(internal_size_type buffer_size)
+    {
+        impl.max_buffer_size(buffer_size);
+    }
+
+    //! \}
+
+    //! \name Statistics
+    //! \{
+
+    //! Reset hash-map statistics
+    void reset_statistics()
+    {
+        impl.reset_statistics();
+    }
+
+    //! Print short general statistics to output stream
+    void print_statistics(std::ostream& o = std::cout) const
+    {
+        impl.print_statistics(o);
+    }
+
+    //! Even more statistics: Number of buckets, number of values, buffer-size,
+    //! values per bucket
+    void print_load_statistics(std::ostream& o = std::cout) const
+    {
+        impl.print_load_statistics(o);
+    }
+
+    // \}
+};
+
+//! \}
+
+STXXL_END_NAMESPACE
+
+namespace std {
+
+template <
+    class KeyType,
+    class MappedType,
+    class HashType,
+    class CompareType,
+    unsigned SubBlockSize,
+    unsigned SubBlocksPerBlock,
+    class AllocType
+    >
+void swap(stxxl::unordered_map<KeyType, MappedType, HashType, CompareType,
+                               SubBlockSize, SubBlocksPerBlock, AllocType>& a,
+          stxxl::unordered_map<KeyType, MappedType, HashType, CompareType,
+                               SubBlockSize, SubBlocksPerBlock, AllocType>& b
+          )
+{
+    a.swap(b);
+}
+
+} // namespace std
+
+#endif // !STXXL_CONTAINERS_UNORDERED_MAP_HEADER
diff --git a/include/stxxl/bits/containers/vector.h b/include/stxxl/bits/containers/vector.h
index 01cbd68..1c30e0b 100644
--- a/include/stxxl/bits/containers/vector.h
+++ b/include/stxxl/bits/containers/vector.h
@@ -31,7 +31,6 @@
 #include <stxxl/bits/mng/buf_istream_reverse.h>
 #include <stxxl/bits/mng/buf_ostream.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 #define STXXL_VERBOSE_VECTOR(msg) STXXL_VERBOSE1("vector[" << static_cast<const void*>(this) << "]::" << msg)
@@ -40,15 +39,16 @@ STXXL_BEGIN_NAMESPACE
 //! \ingroup stllayer
 //! Containers with STL-compatible interface
 
-
 //! \defgroup stlcont_vector vector
 //! \ingroup stlcont
 //! Vector and support classes
 //! \{
 
-template <typename size_type, size_type modulo2, size_type modulo1>
+template <typename SizeType, SizeType modulo2, SizeType modulo1>
 class double_blocked_index
 {
+    typedef SizeType size_type;
+
     static const size_type modulo12 = modulo1 * modulo2;
 
     size_type pos;
@@ -274,10 +274,14 @@ template <typename ValueType, typename AllocStr, typename SizeType, typename Dif
           unsigned BlockSize, typename PagerType, unsigned PageSize>
 class vector_iterator
 {
-    typedef vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> self_type;
-    typedef const_vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> const_self_type;
+    typedef vector_iterator<ValueType, AllocStr, SizeType,
+                            DiffType, BlockSize, PagerType, PageSize> self_type;
+
+    typedef const_vector_iterator<ValueType, AllocStr, SizeType,
+                                  DiffType, BlockSize, PagerType, PageSize> const_self_type;
 
-    friend class const_vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize>;
+    friend class const_vector_iterator<ValueType, AllocStr, SizeType,
+                                       DiffType, BlockSize, PagerType, PageSize>;
 
 public:
     //! \name Types
@@ -546,10 +550,14 @@ template <typename ValueType, typename AllocStr, typename SizeType, typename Dif
           unsigned BlockSize, typename PagerType, unsigned PageSize>
 class const_vector_iterator
 {
-    typedef const_vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> self_type;
-    typedef vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> mutable_self_type;
+    typedef const_vector_iterator<ValueType, AllocStr, SizeType, DiffType,
+                                  BlockSize, PagerType, PageSize> self_type;
 
-    friend class vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize>;
+    typedef vector_iterator<ValueType, AllocStr, SizeType, DiffType,
+                            BlockSize, PagerType, PageSize> mutable_self_type;
+
+    friend class vector_iterator<ValueType, AllocStr, SizeType, DiffType,
+                                 BlockSize, PagerType, PageSize>;
 
 public:
     //! \name Types
@@ -793,7 +801,7 @@ public:
 
 ////////////////////////////////////////////////////////////////////////////
 
-//! \brief External vector container. \n
+//! External vector container. \n
 //! <b>Introduction</b> to vector container: see \ref tutorial_vector tutorial. \n
 //! <b>Design and Internals</b> of vector container: see \ref design_vector
 //!
@@ -1038,7 +1046,7 @@ public:
         return size_type(m_bids.size()) * block_type::raw_size;
     }
 
-    /*! \brief Reserves at least n elements in external memory.
+    /*! Reserves at least n elements in external memory.
      *
      * If n is less than or equal to capacity(), this call has no
      * effect. Otherwise, it is a request for allocation of additional \b
@@ -1122,7 +1130,7 @@ private:
     void _resize_shrink_capacity(size_type n)
     {
         unsigned_type old_bids_size = m_bids.size();
-        unsigned_type new_bids_size = div_ceil(n, block_type::size);
+        unsigned_type new_bids_size = (unsigned_type)div_ceil(n, block_type::size);
 
         if (new_bids_size > old_bids_size)
         {
@@ -1266,7 +1274,6 @@ public:
         for (unsigned_type i = 0; i < numpages(); ++i)
             m_free_slots.push(i);
 
-
         // allocate blocks equidistantly and in-order
         size_type offset = 0;
         for (bids_container_iterator it = m_bids.begin();
@@ -1706,7 +1713,9 @@ private:
 
     void block_externally_updated(size_type offset) const
     {
-        page_externally_updated(offset / (block_type::size * page_size));
+        page_externally_updated(
+            (unsigned_type)(offset / (block_type::size * page_size))
+            );
     }
 
     void block_externally_updated(const blocked_index_type& offset) const
@@ -1865,25 +1874,25 @@ inline bool operator >= (stxxl::vector<ValueType, PageSize, PagerType, BlockSize
 template <typename ValueType, typename AllocStr, typename SizeType, typename DiffType,
           unsigned BlockSize, typename PagerType, unsigned PageSize>
 bool is_sorted(
-    stxxl::vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> __first,
-    stxxl::vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> __last)
+    stxxl::vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> first,
+    stxxl::vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> last)
 {
     return is_sorted_helper(
-        stxxl::const_vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize>(__first),
-        stxxl::const_vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize>(__last));
+        stxxl::const_vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize>(first),
+        stxxl::const_vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize>(last));
 }
 
 template <typename ValueType, typename AllocStr, typename SizeType, typename DiffType,
-          unsigned BlockSize, typename PagerType, unsigned PageSize, typename _StrictWeakOrdering>
+          unsigned BlockSize, typename PagerType, unsigned PageSize, typename StrictWeakOrdering>
 bool is_sorted(
-    stxxl::vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> __first,
-    stxxl::vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> __last,
-    _StrictWeakOrdering __comp)
+    stxxl::vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> first,
+    stxxl::vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> last,
+    StrictWeakOrdering comp)
 {
     return is_sorted_helper(
-        stxxl::const_vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize>(__first),
-        stxxl::const_vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize>(__last),
-        __comp);
+        stxxl::const_vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize>(first),
+        stxxl::const_vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize>(last),
+        comp);
 }
 
 ////////////////////////////////////////////////////////////////////////////
@@ -1932,6 +1941,9 @@ public:
     //! construct an iterator for vector_bufreader (for C++11 range-based for loop)
     typedef vector_bufreader_iterator<vector_bufreader> bufreader_iterator;
 
+    //! size of remaining data
+    typedef typename vector_type::size_type size_type;
+
 protected:
     //! iterator to the beginning of the range.
     vector_iterator m_begin;
@@ -1956,7 +1968,8 @@ public:
     //! \param begin iterator to position were to start reading in vector
     //! \param end iterator to position were to end reading in vector
     //! \param nbuffers number of buffers used for overlapped I/O (>= 2*D recommended)
-    vector_bufreader(vector_iterator begin, vector_iterator end, unsigned_type nbuffers = 0)
+    vector_bufreader(vector_iterator begin, vector_iterator end,
+                     unsigned_type nbuffers = 0)
         : m_begin(begin), m_end(end),
           m_bufin(NULL),
           m_nbuffers(nbuffers)
@@ -2050,10 +2063,10 @@ public:
     }
 
     //! Return remaining size.
-    size_t size() const
+    size_type size() const
     {
         assert(m_begin <= m_iter && m_iter <= m_end);
-        return (m_end - m_iter);
+        return (size_type)(m_end - m_iter);
     }
 
     //! Returns true once the whole range has been read.
@@ -2196,6 +2209,9 @@ public:
     //! construct output buffered stream used for overlapped reading
     typedef buf_istream_reverse<block_type, bids_container_iterator> buf_istream_type;
 
+    //! size of remaining data
+    typedef typename vector_type::size_type size_type;
+
 protected:
     //! iterator to the beginning of the range.
     vector_iterator m_begin;
@@ -2217,7 +2233,8 @@ public:
     //! \param begin iterator to position were to start reading in vector
     //! \param end iterator to position were to end reading in vector
     //! \param nbuffers number of buffers used for overlapped I/O (>= 2*D recommended)
-    vector_bufreader_reverse(vector_iterator begin, vector_iterator end, unsigned_type nbuffers = 0)
+    vector_bufreader_reverse(vector_iterator begin, vector_iterator end,
+                             unsigned_type nbuffers = 0)
         : m_begin(begin), m_end(end),
           m_bufin(NULL),
           m_nbuffers(nbuffers)
@@ -2317,10 +2334,10 @@ public:
     }
 
     //! Return remaining size.
-    size_t size() const
+    size_type size() const
     {
         assert(m_begin <= m_iter && m_iter <= m_end);
-        return (m_iter - m_begin);
+        return (size_type)(m_iter - m_begin);
     }
 
     //! Returns true once the whole range has been read.
@@ -2443,7 +2460,12 @@ public:
             // iterator points to end of vector -> double vector's size
 
             if (m_bufout) {
-                m_bufout->flush(); // flush overlap buffers
+                // fixes issue with buf_ostream writing invalid blocks: when
+                // buf_ostream::current_elem advances to next block, flush()
+                // will write to block beyond bid().end.
+                if (m_iter.block_offset() != 0)
+                    m_bufout->flush(); // flushes overlap buffers
+
                 delete m_bufout;
                 m_bufout = NULL;
 
@@ -2558,7 +2580,7 @@ public:
 
 ////////////////////////////////////////////////////////////////////////////
 
-//! \brief External vector type generator.
+//! External vector type generator.
 //!
 //! \tparam ValueType element type of contained objects (POD with no references to internal memory)
 //! \tparam PageSize number of blocks in a page, default: \b 4 (recommended >= D)
diff --git a/include/stxxl/bits/defines.h b/include/stxxl/bits/defines.h
index 771fac1..b12d571 100644
--- a/include/stxxl/bits/defines.h
+++ b/include/stxxl/bits/defines.h
@@ -20,7 +20,7 @@
 //#define STXXL_HAVE_BOOSTFD_FILE 0/1
 //#define STXXL_HAVE_WINCALL_FILE 0/1
 //#define STXXL_HAVE_WBTL_FILE 0/1
-#define STXXL_HAVE_AIO_FILE 0  // only available on the kernelaio branch
+//#define STXXL_HAVE_LINUXAIO_FILE 0/1
 // default: 0/1 (platform and type dependent)
 // used in: io/*_file.h, io/*_file.cpp, mng/mng.cpp
 // affects: library
diff --git a/include/stxxl/bits/io/boostfd_file.h b/include/stxxl/bits/io/boostfd_file.h
index 24a4bc9..1fa8bb3 100644
--- a/include/stxxl/bits/io/boostfd_file.h
+++ b/include/stxxl/bits/io/boostfd_file.h
@@ -8,6 +8,7 @@
  *  Copyright (C) 2006 Roman Dementiev <dementiev at ira.uka.de>
  *  Copyright (C) 2008 Andreas Beckmann <beckmann at cs.uni-frankfurt.de>
  *  Copyright (C) 2009 Johannes Singler <singler at ira.uka.de>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
  *
  *  Distributed under the Boost Software License, Version 1.0.
  *  (See accompanying file LICENSE_1_0.txt or copy at
@@ -34,7 +35,6 @@
 
 #include <boost/iostreams/device/file_descriptor.hpp>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup fileimpl
@@ -46,18 +46,24 @@ class boostfd_file : public disk_queued_file
     typedef boost::iostreams::file_descriptor fd_type;
 
 protected:
-    mutex fd_mutex;        // sequentialize function calls involving file_des
-    fd_type file_des;
-    int mode_;
+    //! sequentialize function calls involving m_file_des
+    mutex m_fd_mutex;
+    fd_type m_file_des;
+    int m_mode;
     offset_type _size();
 
 public:
-    boostfd_file(const std::string& filename, int mode, int queue_id = DEFAULT_QUEUE, int allocator_id = NO_ALLOCATOR);
+    boostfd_file(
+        const std::string& filename, int mode,
+        int queue_id = DEFAULT_QUEUE,
+        int allocator_id = NO_ALLOCATOR,
+        unsigned int device_id = DEFAULT_DEVICE_ID);
     ~boostfd_file();
     offset_type size();
     void set_size(offset_type newsize);
     void lock();
-    void serve(const request* req) throw (io_error);
+    void serve(void* buffer, offset_type offset, size_type bytes,
+               request::request_type type);
     const char * io_type() const;
 };
 
diff --git a/include/stxxl/bits/io/completion_handler.h b/include/stxxl/bits/io/completion_handler.h
index 0280254..83ce2a6 100644
--- a/include/stxxl/bits/io/completion_handler.h
+++ b/include/stxxl/bits/io/completion_handler.h
@@ -18,7 +18,7 @@
 
 #include <stxxl/bits/namespace.h>
 #include <stxxl/bits/compat/unique_ptr.h>
-
+#include <cstdlib>
 
 STXXL_BEGIN_NAMESPACE
 
@@ -32,70 +32,68 @@ public:
     virtual ~completion_handler_impl() { }
 };
 
-template <typename handler_type>
+template <typename HandlerType>
 class completion_handler1 : public completion_handler_impl
 {
 private:
-    handler_type handler_;
+    HandlerType m_handler;
 
 public:
-    completion_handler1(const handler_type& handler__) : handler_(handler__) { }
+    completion_handler1(const HandlerType& handler)
+        : m_handler(handler)
+    { }
     completion_handler1 * clone() const
     {
         return new completion_handler1(*this);
     }
     void operator () (request* req)
     {
-        handler_(req);
+        m_handler(req);
     }
 };
 
 //! Completion handler class (Loki-style).
 //!
-//! In some situations one needs to execute
-//! some actions after completion of an I/O
-//! request. In these cases one can use
-//! an I/O completion handler - a function
-//! object that can be passed as a parameter
-//! to asynchronous I/O calls \c stxxl::file::aread
-//! and \c stxxl::file::awrite .
+//! In some situations one needs to execute some actions after completion of an
+//! I/O request. In these cases one can use an I/O completion handler - a
+//! function object that can be passed as a parameter to asynchronous I/O calls
+//! \c stxxl::file::aread and \c stxxl::file::awrite .
 class completion_handler
 {
-    compat_unique_ptr<completion_handler_impl>::result sp_impl_;
+    compat_unique_ptr<completion_handler_impl>::result m_ptr;
 
 public:
-    completion_handler() :
-        sp_impl_(static_cast<completion_handler_impl*>(0))
+    //! Construct default, no operation completion handler.
+    completion_handler()
+        : m_ptr(static_cast<completion_handler_impl*>(NULL))
     { }
 
-    completion_handler(const completion_handler& obj) :
-        sp_impl_(obj.sp_impl_.get()->clone())
+    //! Copy constructor.
+    completion_handler(const completion_handler& obj)
+        : m_ptr(obj.m_ptr.get() ? obj.m_ptr.get()->clone() : NULL)
     { }
 
-    template <typename handler_type>
-    completion_handler(const handler_type& handler__) :
-        sp_impl_(new completion_handler1<handler_type>(handler__))
+    //! Construct a completion handler which calls some function.
+    template <typename HandlerType>
+    completion_handler(const HandlerType& handler)
+        : m_ptr(new completion_handler1<HandlerType>(handler))
     { }
 
+    //! Assignment operator
     completion_handler& operator = (const completion_handler& obj)
     {
-        sp_impl_.reset(obj.sp_impl_.get()->clone());
+        m_ptr.reset(obj.m_ptr.get() ? obj.m_ptr.get()->clone() : NULL);
         return *this;
     }
+
+    //! Call the enclosed completion handler.
     void operator () (request* req)
     {
-        (* sp_impl_)(req);
+        if (m_ptr.get())
+            (* m_ptr)(req);
     }
 };
 
-//! Default completion handler class.
-
-struct default_completion_handler
-{
-    //! An operator that does nothing.
-    void operator () (request*) { }
-};
-
 STXXL_END_NAMESPACE
 
 #endif // !STXXL_IO_COMPLETION_HANDLER_HEADER
diff --git a/include/stxxl/bits/io/disk_queued_file.h b/include/stxxl/bits/io/disk_queued_file.h
index 6a7ee0f..21b94f6 100644
--- a/include/stxxl/bits/io/disk_queued_file.h
+++ b/include/stxxl/bits/io/disk_queued_file.h
@@ -5,6 +5,7 @@
  *
  *  Copyright (C) 2008 Andreas Beckmann <beckmann at cs.uni-frankfurt.de>
  *  Copyright (C) 2009 Johannes Singler <singler at ira.uka.de>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
  *
  *  Distributed under the Boost Software License, Version 1.0.
  *  (See accompanying file LICENSE_1_0.txt or copy at
@@ -18,7 +19,6 @@
 #include <stxxl/bits/io/request.h>
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup fileimpl
@@ -29,30 +29,33 @@ class completion_handler;
 //! Implementation of some file methods based on serving_request.
 class disk_queued_file : public virtual file
 {
-    int queue_id, allocator_id;
+    int m_queue_id, m_allocator_id;
 
 public:
-    disk_queued_file(int queue_id, int allocator_id) : queue_id(queue_id), allocator_id(allocator_id)
+    disk_queued_file(int queue_id, int allocator_id)
+        : m_queue_id(queue_id), m_allocator_id(allocator_id)
     { }
+
     request_ptr aread(
         void* buffer,
         offset_type pos,
         size_type bytes,
-        const completion_handler& on_cmpl);
+        const completion_handler& on_cmpl = completion_handler());
+
     request_ptr awrite(
         void* buffer,
         offset_type pos,
         size_type bytes,
-        const completion_handler& on_cmpl);
+        const completion_handler& on_cmpl = completion_handler());
 
     virtual int get_queue_id() const
     {
-        return queue_id;
+        return m_queue_id;
     }
 
     virtual int get_allocator_id() const
     {
-        return allocator_id;
+        return m_allocator_id;
     }
 };
 
diff --git a/include/stxxl/bits/io/disk_queues.h b/include/stxxl/bits/io/disk_queues.h
index f656c45..9aa2312 100644
--- a/include/stxxl/bits/io/disk_queues.h
+++ b/include/stxxl/bits/io/disk_queues.h
@@ -6,6 +6,7 @@
  *  Copyright (C) 2002 Roman Dementiev <dementiev at mpi-sb.mpg.de>
  *  Copyright (C) 2008-2010 Andreas Beckmann <beckmann at cs.uni-frankfurt.de>
  *  Copyright (C) 2009 Johannes Singler <singler at ira.uka.de>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
  *
  *  Distributed under the Boost Software License, Version 1.0.
  *  (See accompanying file LICENSE_1_0.txt or copy at
@@ -22,11 +23,13 @@
 #include <stxxl/bits/io/iostats.h>
 #include <stxxl/bits/io/request.h>
 #include <stxxl/bits/io/request_queue_impl_qwqr.h>
-
+#include <stxxl/bits/io/linuxaio_queue.h>
+#include <stxxl/bits/io/linuxaio_request.h>
+#include <stxxl/bits/io/serving_request.h>
 
 STXXL_BEGIN_NAMESPACE
 
-//! \addtogroup iolayer
+//! \addtogroup reqlayer
 //! \{
 
 //! Encapsulates disk queues.
@@ -35,11 +38,8 @@ class disk_queues : public singleton<disk_queues>
 {
     friend class singleton<disk_queues>;
 
-    // 2 queues: write queue and read queue
-    typedef request_queue_impl_qwqr request_queue_type;
-
     typedef stxxl::int64 DISKID;
-    typedef std::map<DISKID, request_queue_type*> request_queue_map;
+    typedef std::map<DISKID, request_queue*> request_queue_map;
 
 protected:
     request_queue_map queues;
@@ -54,12 +54,24 @@ public:
 #ifdef STXXL_HACK_SINGLE_IO_THREAD
         disk = 42;
 #endif
-        if (queues.find(disk) == queues.end())
+        request_queue_map::iterator qi = queues.find(disk);
+        request_queue* q;
+        if (qi == queues.end())
         {
             // create new request queue
-            queues[disk] = new request_queue_type();
+#if STXXL_HAVE_LINUXAIO_FILE
+            if (dynamic_cast<linuxaio_request*>(req.get()))
+                q = queues[disk] = new linuxaio_queue(
+                        dynamic_cast<linuxaio_file*>(req->get_file())->get_desired_queue_length()
+                        );
+            else
+#endif
+            q = queues[disk] = new request_queue_impl_qwqr();
         }
-        queues[disk]->add_request(req);
+        else
+            q = qi->second;
+
+        q->add_request(req);
     }
 
     //! Cancel a request.
@@ -81,6 +93,14 @@ public:
             return false;
     }
 
+    request_queue * get_queue(DISKID disk)
+    {
+        if (queues.find(disk) != queues.end())
+            return queues[disk];
+        else
+            return NULL;
+    }
+
     ~disk_queues()
     {
         // deallocate all queues
diff --git a/include/stxxl/bits/io/file.h b/include/stxxl/bits/io/file.h
index 7b3e6ed..1d9c141 100644
--- a/include/stxxl/bits/io/file.h
+++ b/include/stxxl/bits/io/file.h
@@ -6,7 +6,7 @@
  *  Copyright (C) 2002 Roman Dementiev <dementiev at mpi-sb.mpg.de>
  *  Copyright (C) 2008, 2010 Andreas Beckmann <beckmann at cs.uni-frankfurt.de>
  *  Copyright (C) 2008, 2009 Johannes Singler <singler at ira.uka.de>
- *  Copyright (C) 2013 Timo Bingmann <tb at panthema.net>
+ *  Copyright (C) 2013-2014 Timo Bingmann <tb at panthema.net>
  *
  *  Distributed under the Boost Software License, Version 1.0.
  *  (See accompanying file LICENSE_1_0.txt or copy at
@@ -27,7 +27,7 @@
 #include <string>
 
 #include <stxxl/bits/common/exceptions.h>
-#include <stxxl/bits/common/mutex.h>
+#include <stxxl/bits/common/counting_ptr.h>
 #include <stxxl/bits/common/types.h>
 #include <stxxl/bits/io/request.h>
 #include <stxxl/bits/io/request_interface.h>
@@ -37,12 +37,16 @@
 #include <stxxl/bits/unused.h>
 #include <stxxl/bits/verbose.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup iolayer
 //! \{
 
+//! \defgroup fileimpl File I/O Implementations
+//! Implementations of \c stxxl::file for various file access methods and
+//! operating systems.
+//! \{
+
 class completion_handler;
 
 //! Defines interface of file.
@@ -51,14 +55,6 @@ class completion_handler;
 //! base on various file systems or even remote storage interfaces
 class file : private noncopyable
 {
-    mutex request_ref_cnt_mutex;
-    int request_ref_cnt;
-
-protected:
-    //! Initializes file object.
-    //! \remark Called in implementations of file
-    file() : request_ref_cnt(0) { }
-
 public:
     //! the offset of a request, also the size of the file
     typedef request::offset_type offset_type;
@@ -79,52 +75,43 @@ public:
                              //!< Tries to open with appropriate flags, if fails print warning and open normally.
         TRUNC = 32,          //!< once file is opened its length becomes zero
         SYNC = 64,           //!< open the file with O_SYNC | O_DSYNC | O_RSYNC flags set
-        NO_LOCK = 128,       //!< do not aquire an exclusive lock by default
+        NO_LOCK = 128,       //!< do not acquire an exclusive lock by default
         REQUIRE_DIRECT = 256 //!< implies DIRECT, fail if opening with DIRECT flag does not work.
     };
 
     static const int DEFAULT_QUEUE = -1;
-    static const int NO_QUEUE = -2;
+    static const int DEFAULT_LINUXAIO_QUEUE = -2;
     static const int NO_ALLOCATOR = -1;
+    static const unsigned int DEFAULT_DEVICE_ID = (unsigned int)(-1);
+
+    //! Construct a new file, usually called by a subclass.
+    file(unsigned int device_id = DEFAULT_DEVICE_ID)
+        : m_device_id(device_id)
+    { }
 
     //! Schedules an asynchronous read request to the file.
     //! \param buffer pointer to memory buffer to read into
     //! \param pos file position to start read from
     //! \param bytes number of bytes to transfer
     //! \param on_cmpl I/O completion handler
-    //! \return \c request_ptr request object, which can be used to track the status of the operation
+    //! \return \c request_ptr request object, which can be used to track the
+    //! status of the operation
+
     virtual request_ptr aread(void* buffer, offset_type pos, size_type bytes,
-                              const completion_handler& on_cmpl) = 0;
+                              const completion_handler& on_cmpl = completion_handler()) = 0;
 
     //! Schedules an asynchronous write request to the file.
     //! \param buffer pointer to memory buffer to write from
     //! \param pos starting file position to write
     //! \param bytes number of bytes to transfer
     //! \param on_cmpl I/O completion handler
-    //! \return \c request_ptr request object, which can be used to track the status of the operation
+    //! \return \c request_ptr request object, which can be used to track the
+    //! status of the operation
     virtual request_ptr awrite(void* buffer, offset_type pos, size_type bytes,
-                               const completion_handler& on_cmpl) = 0;
-
-    virtual void serve(const request* req) throw (io_error) = 0;
-
-    void add_request_ref()
-    {
-        scoped_mutex_lock Lock(request_ref_cnt_mutex);
-        ++request_ref_cnt;
-    }
+                               const completion_handler& on_cmpl = completion_handler()) = 0;
 
-    void delete_request_ref()
-    {
-        scoped_mutex_lock Lock(request_ref_cnt_mutex);
-        assert(request_ref_cnt > 0);
-        --request_ref_cnt;
-    }
-
-    int get_request_nref()
-    {
-        scoped_mutex_lock Lock(request_ref_cnt_mutex);
-        return request_ref_cnt;
-    }
+    virtual void serve(void* buffer, offset_type offset, size_type bytes,
+                       request::request_type type) = 0;
 
     //! Changes the size of the file.
     //! \param newsize new file size
@@ -134,20 +121,14 @@ public:
     //! \return file size in bytes
     virtual offset_type size() = 0;
 
-    //! Returns the identifier of the file's queue.
-    //! \remark Files allocated on the same physical device usually share the same queue
-    //! \return queue number
+    //! Returns the identifier of the file's queue number.
+    //! \remark Files allocated on the same physical device usually share the
+    //! same queue, unless there is a common queue (e.g. with linuxaio).
     virtual int get_queue_id() const = 0;
 
-    //! Returns the file's allocator.
-    //! \return allocator number
+    //! Returns the file's disk allocator number
     virtual int get_allocator_id() const = 0;
 
-    virtual int get_physical_device_id() const
-    {
-        return get_queue_id();
-    }
-
     //! Locks file for reading and writing (acquires a lock in the file system).
     virtual void lock() = 0;
 
@@ -159,7 +140,8 @@ public:
         STXXL_UNUSED(size);
     }
 
-    virtual void export_files(offset_type offset, offset_type length, std::string prefix)
+    virtual void export_files(offset_type offset, offset_type length,
+                              std::string prefix)
     {
         STXXL_UNUSED(offset);
         STXXL_UNUSED(length);
@@ -171,16 +153,51 @@ public:
 
     virtual ~file()
     {
-        int nr = get_request_nref();
+        unsigned_type nr = get_request_nref();
         if (nr != 0)
-            STXXL_ERRMSG("stxxl::file is being deleted while there are still " << nr << " (unfinished) requests referencing it");
+            STXXL_ERRMSG("stxxl::file is being deleted while there are "
+                         "still " << nr << " (unfinished) requests "
+                         "referencing it");
     }
 
     //! Identifies the type of I/O implementation.
-    //! \return pointer to null terminated string of characters, containing the name of I/O implementation
-    virtual const char * io_type() const
+    //! \return pointer to null terminated string of characters, containing the
+    //! name of I/O implementation
+    virtual const char * io_type() const = 0;
+
+protected:
+    //! The file's physical device id (e.g. used for prefetching sequence
+    //! calculation)
+    unsigned int m_device_id;
+
+public:
+    //! Returns the file's physical device id
+    unsigned int get_device_id() const
+    {
+        return m_device_id;
+    }
+
+protected:
+    //! count the number of requests referencing this file
+    atomic_counted_object m_request_ref;
+
+public:
+    //! increment referenced requests
+    void add_request_ref()
+    {
+        m_request_ref.inc_reference();
+    }
+
+    //! decrement referenced requests
+    void delete_request_ref()
     {
-        return "none";
+        m_request_ref.dec_reference();
+    }
+
+    //! return number of referenced requests
+    unsigned_type get_request_nref()
+    {
+        return m_request_ref.get_reference_count();
     }
 
 public:
@@ -197,9 +214,11 @@ public:
     //! \}
 };
 
-//! \defgroup fileimpl File I/O Implementations
-//! Implementations of \c stxxl::file and \c stxxl::request
-//! for various file access methods
+//! \}
+
+//! \defgroup reqlayer I/O Requests and Queues
+//! Encapsulation of an I/O request, queues for requests and threads to process
+//! them.
 //! \{
 //! \}
 
diff --git a/include/stxxl/bits/io/fileperblock_file.h b/include/stxxl/bits/io/fileperblock_file.h
index 6780ae8..4dd1768 100644
--- a/include/stxxl/bits/io/fileperblock_file.h
+++ b/include/stxxl/bits/io/fileperblock_file.h
@@ -16,7 +16,6 @@
 #include <string>
 #include <stxxl/bits/io/disk_queued_file.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup fileimpl
@@ -46,11 +45,13 @@ public:
         const std::string& filename_prefix,
         int mode,
         int queue_id = DEFAULT_QUEUE,
-        int allocator_id = NO_ALLOCATOR);
+        int allocator_id = NO_ALLOCATOR,
+        unsigned int device_id = DEFAULT_DEVICE_ID);
 
     virtual ~fileperblock_file();
 
-    virtual void serve(const request* req) throw (io_error);
+    virtual void serve(void* buffer, offset_type offset, size_type bytes,
+                       request::request_type type);
 
     //! Changes the size of the file.
     //! \param new_size value of the new file size
diff --git a/include/stxxl/bits/io/io.h b/include/stxxl/bits/io/io.h
index 9a857bb..5c066f9 100644
--- a/include/stxxl/bits/io/io.h
+++ b/include/stxxl/bits/io/io.h
@@ -24,6 +24,7 @@
 #include <stxxl/bits/io/mem_file.h>
 #include <stxxl/bits/io/fileperblock_file.h>
 #include <stxxl/bits/io/wbtl_file.h>
+#include <stxxl/bits/io/linuxaio_file.h>
 #include <stxxl/bits/io/create_file.h>
 #include <stxxl/bits/io/disk_queues.h>
 #include <stxxl/bits/io/iostats.h>
@@ -32,13 +33,6 @@
 //! \c STXXL library namespace
 STXXL_BEGIN_NAMESPACE
 
-//! \defgroup iolayer I/O Primitives Layer
-//! Group of classes which enable abstraction from operating system calls and support
-//! system-independent interfaces for asynchronous I/O.
-//! \{
-
-//! \}
-
     STXXL_END_NAMESPACE
 
 #endif // !STXXL_IO_IO_HEADER
diff --git a/include/stxxl/bits/io/iostats.h b/include/stxxl/bits/io/iostats.h
index 8bbbab7..d9ea649 100644
--- a/include/stxxl/bits/io/iostats.h
+++ b/include/stxxl/bits/io/iostats.h
@@ -31,7 +31,6 @@
 #include <iostream>
 #include <string>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup iolayer
@@ -426,59 +425,66 @@ inline void stats::wait_started(wait_op_type) { }
 inline void stats::wait_finished(wait_op_type) { }
 #endif
 
-
 class stats_data
 {
-    unsigned reads, writes;                    // number of operations
-    int64 volume_read, volume_written;         // number of bytes read/written
-    unsigned c_reads, c_writes;                // number of cached operations
-    int64 c_volume_read, c_volume_written;     // number of bytes read/written from/to cache
-    double t_reads, t_writes;                  // seconds spent in operations
-    double p_reads, p_writes;                  // seconds spent in parallel operations
-    double p_ios;                              // seconds spent in all parallel I/O operations (read and write)
-    double t_wait;                             // seconds spent waiting for completion of I/O operations
-    double t_wait_read, t_wait_write;          //
+    //! number of operations
+    unsigned reads, writes;
+    //! number of bytes read/written
+    int64 volume_read, volume_written;
+    //! number of cached operations
+    unsigned c_reads, c_writes;
+    //! number of bytes read/written from/to cache
+    int64 c_volume_read, c_volume_written;
+    //! seconds spent in operations
+    double t_reads, t_writes;
+    //! seconds spent in parallel operations
+    double p_reads, p_writes;
+    //! seconds spent in all parallel I/O operations (read and write)
+    double p_ios;
+    //! seconds spent waiting for completion of I/O operations
+    double t_wait;
+    double t_wait_read, t_wait_write;
     double elapsed;
 
 public:
-    stats_data() :
-        reads(0),
-        writes(0),
-        volume_read(0),
-        volume_written(0),
-        c_reads(0),
-        c_writes(0),
-        c_volume_read(0),
-        c_volume_written(0),
-        t_reads(0.0),
-        t_writes(0.0),
-        p_reads(0.0),
-        p_writes(0.0),
-        p_ios(0.0),
-        t_wait(0.0),
-        t_wait_read(0.0),
-        t_wait_write(0.0),
-        elapsed(0.0)
+    stats_data()
+        : reads(0),
+          writes(0),
+          volume_read(0),
+          volume_written(0),
+          c_reads(0),
+          c_writes(0),
+          c_volume_read(0),
+          c_volume_written(0),
+          t_reads(0.0),
+          t_writes(0.0),
+          p_reads(0.0),
+          p_writes(0.0),
+          p_ios(0.0),
+          t_wait(0.0),
+          t_wait_read(0.0),
+          t_wait_write(0.0),
+          elapsed(0.0)
     { }
 
-    stats_data(const stats& s) :
-        reads(s.get_reads()),
-        writes(s.get_writes()),
-        volume_read(s.get_read_volume()),
-        volume_written(s.get_written_volume()),
-        c_reads(s.get_cached_reads()),
-        c_writes(s.get_cached_writes()),
-        c_volume_read(s.get_cached_read_volume()),
-        c_volume_written(s.get_cached_written_volume()),
-        t_reads(s.get_read_time()),
-        t_writes(s.get_write_time()),
-        p_reads(s.get_pread_time()),
-        p_writes(s.get_pwrite_time()),
-        p_ios(s.get_pio_time()),
-        t_wait(s.get_io_wait_time()),
-        t_wait_read(s.get_wait_read_time()),
-        t_wait_write(s.get_wait_write_time()),
-        elapsed(timestamp() - s.get_last_reset_time())
+    stats_data(const stats& s)
+        : reads(s.get_reads()),
+          writes(s.get_writes()),
+          volume_read(s.get_read_volume()),
+          volume_written(s.get_written_volume()),
+          c_reads(s.get_cached_reads()),
+          c_writes(s.get_cached_writes()),
+          c_volume_read(s.get_cached_read_volume()),
+          c_volume_written(s.get_cached_written_volume()),
+          t_reads(s.get_read_time()),
+          t_writes(s.get_write_time()),
+          p_reads(s.get_pread_time()),
+          p_writes(s.get_pwrite_time()),
+          p_ios(s.get_pio_time()),
+          t_wait(s.get_io_wait_time()),
+          t_wait_read(s.get_wait_read_time()),
+          t_wait_write(s.get_wait_write_time()),
+          elapsed(timestamp() - s.get_last_reset_time())
     { }
 
     stats_data operator + (const stats_data& a) const
diff --git a/include/stxxl/bits/io/linuxaio_file.h b/include/stxxl/bits/io/linuxaio_file.h
new file mode 100644
index 0000000..1aa1bfc
--- /dev/null
+++ b/include/stxxl/bits/io/linuxaio_file.h
@@ -0,0 +1,82 @@
+/***************************************************************************
+ *  include/stxxl/bits/io/linuxaio_file.h
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2011 Johannes Singler <singler at kit.edu>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#ifndef STXXL_IO_LINUXAIO_FILE_HEADER
+#define STXXL_IO_LINUXAIO_FILE_HEADER
+
+#include <stxxl/bits/config.h>
+
+#if STXXL_HAVE_LINUXAIO_FILE
+
+#include <stxxl/bits/io/ufs_file_base.h>
+#include <stxxl/bits/io/disk_queued_file.h>
+#include <stxxl/bits/io/linuxaio_queue.h>
+
+STXXL_BEGIN_NAMESPACE
+
+class linuxaio_queue;
+
+//! \addtogroup fileimpl
+//! \{
+
+//! Implementation of \c file based on the Linux kernel interface for
+//! asynchronous I/O
+class linuxaio_file : public ufs_file_base, public disk_queued_file
+{
+    friend class linuxaio_request;
+
+private:
+    int desired_queue_length;
+
+public:
+    //! Constructs file object
+    //! \param filename path of file
+    //! \param mode open mode, see \c stxxl::file::open_modes
+    //! \param queue_id disk queue identifier
+    //! \param allocator_id linked disk_allocator
+    //! \param device_id physical device identifier
+    //! \param desired_queue_length queue length requested from kernel
+    linuxaio_file(
+        const std::string& filename, int mode,
+        int queue_id = DEFAULT_LINUXAIO_QUEUE,
+        int allocator_id = NO_ALLOCATOR,
+        unsigned int device_id = DEFAULT_DEVICE_ID,
+        int desired_queue_length = 0)
+        : file(device_id),
+          ufs_file_base(filename, mode),
+          disk_queued_file(queue_id, allocator_id),
+          desired_queue_length(desired_queue_length)
+    { }
+
+    void serve(void* buffer, offset_type offset, size_type bytes,
+               request::request_type type);
+    request_ptr aread(void* buffer, offset_type pos, size_type bytes,
+                      const completion_handler& on_cmpl = completion_handler());
+    request_ptr awrite(void* buffer, offset_type pos, size_type bytes,
+                       const completion_handler& on_cmpl = completion_handler());
+    const char * io_type() const;
+
+    int get_desired_queue_length() const
+    {
+        return desired_queue_length;
+    }
+};
+
+//! \}
+
+STXXL_END_NAMESPACE
+
+#endif // #if STXXL_HAVE_LINUXAIO_FILE
+
+#endif // !STXXL_IO_LINUXAIO_FILE_HEADER
+// vim: et:ts=4:sw=4
diff --git a/include/stxxl/bits/io/linuxaio_queue.h b/include/stxxl/bits/io/linuxaio_queue.h
new file mode 100644
index 0000000..aa8bcef
--- /dev/null
+++ b/include/stxxl/bits/io/linuxaio_queue.h
@@ -0,0 +1,100 @@
+/***************************************************************************
+ *  include/stxxl/bits/io/linuxaio_queue.h
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2011 Johannes Singler <singler at kit.edu>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#ifndef STXXL_IO_LINUXAIO_QUEUE_HEADER
+#define STXXL_IO_LINUXAIO_QUEUE_HEADER
+
+#include <stxxl/bits/io/linuxaio_file.h>
+
+#if STXXL_HAVE_LINUXAIO_FILE
+
+#include <linux/aio_abi.h>
+#include <list>
+
+#include <stxxl/bits/io/request_queue_impl_worker.h>
+#include <stxxl/bits/common/mutex.h>
+
+STXXL_BEGIN_NAMESPACE
+
+//! \addtogroup reqlayer
+//! \{
+
+//! Queue for linuxaio_file(s)
+//!
+//! Only one queue exists in a program, i.e. it is a singleton.
+class linuxaio_queue : public request_queue_impl_worker
+{
+    friend class linuxaio_request;
+
+    typedef linuxaio_queue self_type;
+
+private:
+    //! OS context
+    aio_context_t context;
+
+    //! storing linuxaio_request* would drop ownership
+    typedef std::list<request_ptr> queue_type;
+
+    // "waiting" request have submitted to this queue, but not yet to the OS,
+    // those are "posted"
+    mutex waiting_mtx, posted_mtx;
+    queue_type waiting_requests, posted_requests;
+
+    //! max number of OS requests
+    int max_events;
+    //! number of requests in waitings_requests
+    semaphore num_waiting_requests, num_free_events, num_posted_requests;
+
+    // two threads, one for posting, one for waiting
+    thread_type post_thread, wait_thread;
+    state<thread_state> post_thread_state, wait_thread_state;
+
+    // Why do we need two threads, one for posting, and one for waiting?  Is
+    // one not enough?
+    // 1. User call cannot io_submit directly, since this tends to take
+    //    considerable time sometimes
+    // 2. A single thread cannot wait for the user program to post requests
+    //    and the OS to produce I/O completion events at the same time
+    //    (IOCB_CMD_NOOP does not seem to help here either)
+
+    static const priority_op _priority_op = WRITE;
+
+    static void * post_async(void* arg);   // thread start callback
+    static void * wait_async(void* arg);   // thread start callback
+    void post_requests();
+    void handle_events(io_event* events, long num_events, bool canceled);
+    void wait_requests();
+    void suspend();
+
+    // needed by linuxaio_request
+    aio_context_t get_io_context() { return context; }
+
+public:
+    //! Construct queue. Requests max number of requests simultaneously
+    //! submitted to disk, 0 means as many as possible
+    linuxaio_queue(int desired_queue_length = 0);
+
+    void add_request(request_ptr& req);
+    bool cancel_request(request_ptr& req);
+    void complete_request(request_ptr& req);
+    ~linuxaio_queue();
+};
+
+//! \}
+
+STXXL_END_NAMESPACE
+
+#endif // #if STXXL_HAVE_LINUXAIO_FILE
+
+#endif // !STXXL_IO_LINUXAIO_QUEUE_HEADER
+// vim: et:ts=4:sw=4
diff --git a/include/stxxl/bits/io/linuxaio_request.h b/include/stxxl/bits/io/linuxaio_request.h
new file mode 100644
index 0000000..6ee20e5
--- /dev/null
+++ b/include/stxxl/bits/io/linuxaio_request.h
@@ -0,0 +1,74 @@
+/***************************************************************************
+ *  include/stxxl/bits/io/linuxaio_request.h
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2011 Johannes Singler <singler at kit.edu>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#ifndef STXXL_IO_LINUXAIO_REQUEST_HEADER
+#define STXXL_IO_LINUXAIO_REQUEST_HEADER
+
+#include <stxxl/bits/io/linuxaio_file.h>
+
+#if STXXL_HAVE_LINUXAIO_FILE
+
+#include <linux/aio_abi.h>
+#include <stxxl/bits/io/request_with_state.h>
+
+#define STXXL_VERBOSE_LINUXAIO(msg) STXXL_VERBOSE2(msg)
+
+STXXL_BEGIN_NAMESPACE
+
+//! \addtogroup reqlayer
+//! \{
+
+//! Request for an linuxaio_file.
+class linuxaio_request : public request_with_state
+{
+    template <class base_file_type>
+    friend class fileperblock_file;
+
+    //! control block of async request
+    iocb cb;
+
+    void fill_control_block();
+
+public:
+    linuxaio_request(
+        const completion_handler& on_cmpl,
+        file* file,
+        void* buffer,
+        offset_type offset,
+        size_type bytes,
+        request_type type)
+        : request_with_state(on_cmpl, file, buffer, offset, bytes, type)
+    {
+        assert(dynamic_cast<linuxaio_file*>(file));
+        STXXL_VERBOSE_LINUXAIO("linuxaio_request[" << this << "]" <<
+                               " linuxaio_request" <<
+                               "(file=" << file << " buffer=" << buffer <<
+                               " offset=" << offset << " bytes=" << bytes <<
+                               " type=" << type << ")");
+    }
+
+    bool post();
+    bool cancel();
+    bool cancel_aio();
+    void completed(bool posted, bool canceled);
+    void completed(bool canceled) { completed(true, canceled); }
+};
+
+//! \}
+
+STXXL_END_NAMESPACE
+
+#endif // #if STXXL_HAVE_LINUXAIO_FILE
+
+#endif // !STXXL_IO_LINUXAIO_REQUEST_HEADER
+// vim: et:ts=4:sw=4
diff --git a/include/stxxl/bits/io/mem_file.h b/include/stxxl/bits/io/mem_file.h
index a9949d9..95a72dc 100644
--- a/include/stxxl/bits/io/mem_file.h
+++ b/include/stxxl/bits/io/mem_file.h
@@ -5,6 +5,7 @@
  *
  *  Copyright (C) 2008 Andreas Beckmann <beckmann at cs.uni-frankfurt.de>
  *  Copyright (C) 2009 Johannes Singler <singler at ira.uka.de>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
  *
  *  Distributed under the Boost Software License, Version 1.0.
  *  (See accompanying file LICENSE_1_0.txt or copy at
@@ -17,7 +18,6 @@
 #include <stxxl/bits/io/disk_queued_file.h>
 #include <stxxl/bits/io/request.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup fileimpl
@@ -26,17 +26,27 @@ STXXL_BEGIN_NAMESPACE
 //! Implementation of file based on new[] and memcpy.
 class mem_file : public disk_queued_file
 {
-    char* ptr;
-    offset_type sz;
+    //! pointer to memory area of "file"
+    char* m_ptr;
+
+    //! size of memory area
+    offset_type m_size;
 
-    mutex m_mutex;      // sequentialize function calls
+    //! sequentialize function calls
+    mutex m_mutex;
 
 public:
     //! constructs file object.
     mem_file(
-        int queue_id = DEFAULT_QUEUE, int allocator_id = NO_ALLOCATOR) : disk_queued_file(queue_id, allocator_id), ptr(NULL), sz(0)
+        int queue_id = DEFAULT_QUEUE,
+        int allocator_id = NO_ALLOCATOR,
+        unsigned int device_id = DEFAULT_DEVICE_ID)
+        : file(device_id),
+          disk_queued_file(queue_id, allocator_id),
+          m_ptr(NULL), m_size(0)
     { }
-    void serve(const request* req) throw (io_error);
+    void serve(void* buffer, offset_type offset, size_type bytes,
+               request::request_type type);
     ~mem_file();
     offset_type size();
     void set_size(offset_type newsize);
diff --git a/include/stxxl/bits/io/mmap_file.h b/include/stxxl/bits/io/mmap_file.h
index 6cfb98a..04bdfda 100644
--- a/include/stxxl/bits/io/mmap_file.h
+++ b/include/stxxl/bits/io/mmap_file.h
@@ -22,7 +22,6 @@
 #include <stxxl/bits/io/ufs_file_base.h>
 #include <stxxl/bits/io/disk_queued_file.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup fileimpl
@@ -37,10 +36,19 @@ public:
     //! \param mode open mode, see \c stxxl::file::open_modes
     //! \param queue_id disk queue identifier
     //! \param allocator_id linked disk_allocator
-    inline mmap_file(const std::string& filename, int mode, int queue_id = DEFAULT_QUEUE, int allocator_id = NO_ALLOCATOR) :
-        ufs_file_base(filename, mode), disk_queued_file(queue_id, allocator_id)
+    //! \param device_id physical device identifier
+    inline mmap_file(
+        const std::string& filename,
+        int mode,
+        int queue_id = DEFAULT_QUEUE,
+        int allocator_id = NO_ALLOCATOR,
+        unsigned int device_id = DEFAULT_DEVICE_ID)
+        : file(device_id),
+          ufs_file_base(filename, mode),
+          disk_queued_file(queue_id, allocator_id)
     { }
-    void serve(const request* req) throw (io_error);
+    void serve(void* buffer, offset_type offset, size_type bytes,
+               request::request_type type);
     const char * io_type() const;
 };
 
diff --git a/include/stxxl/bits/io/request.h b/include/stxxl/bits/io/request.h
index 7a37418..0dbee44 100644
--- a/include/stxxl/bits/io/request.h
+++ b/include/stxxl/bits/io/request.h
@@ -5,7 +5,7 @@
  *
  *  Copyright (C) 2002 Roman Dementiev <dementiev at mpi-sb.mpg.de>
  *  Copyright (C) 2008 Andreas Beckmann <beckmann at cs.uni-frankfurt.de>
- *  Copyright (C) 2013 Timo Bingmann <tb at panthema.net>
+ *  Copyright (C) 2013-2014 Timo Bingmann <tb at panthema.net>
  *
  *  Distributed under the Boost Software License, Version 1.0.
  *  (See accompanying file LICENSE_1_0.txt or copy at
@@ -25,47 +25,46 @@
 #include <stxxl/bits/compat/unique_ptr.h>
 #include <stxxl/bits/verbose.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-//! \addtogroup iolayer
+//! \addtogroup reqlayer
 //! \{
 
-#define BLOCK_ALIGN 4096
+#define STXXL_BLOCK_ALIGN 4096
 
 class file;
 
-//! Request with basic properties like file and offset.
+//! Request object encapsulating basic properties like file and offset.
 class request : virtual public request_interface, public atomic_counted_object
 {
-protected:
-    completion_handler on_complete;
-    compat_unique_ptr<stxxl::io_error>::result error;
+    friend class linuxaio_queue;
 
 protected:
-    file* file_;
-    void* buffer;
-    offset_type offset;
-    size_type bytes;
-    request_type type;
+    completion_handler m_on_complete;
+    compat_unique_ptr<stxxl::io_error>::result m_error;
 
-    void completed();
+protected:
+    file* m_file;
+    void* m_buffer;
+    offset_type m_offset;
+    size_type m_bytes;
+    request_type m_type;
 
 public:
     request(const completion_handler& on_compl,
-            file* file__,
-            void* buffer_,
-            offset_type offset_,
-            size_type bytes_,
-            request_type type_);
+            file* file,
+            void* buffer,
+            offset_type offset,
+            size_type bytes,
+            request_type type);
 
     virtual ~request();
 
-    file * get_file() const { return file_; }
-    void * get_buffer() const { return buffer; }
-    offset_type get_offset() const { return offset; }
-    size_type get_size() const { return bytes; }
-    request_type get_type() const { return type; }
+    file * get_file() const { return m_file; }
+    void * get_buffer() const { return m_buffer; }
+    offset_type get_offset() const { return m_offset; }
+    size_type get_size() const { return m_bytes; }
+    request_type get_type() const { return m_type; }
 
     void check_alignment() const;
 
@@ -75,23 +74,25 @@ public:
     //! execution.
     void error_occured(const char* msg)
     {
-        error.reset(new stxxl::io_error(msg));
+        m_error.reset(new stxxl::io_error(msg));
     }
 
     //! Inform the request object that an error occurred during the I/O
     //! execution.
     void error_occured(const std::string& msg)
     {
-        error.reset(new stxxl::io_error(msg));
+        m_error.reset(new stxxl::io_error(msg));
     }
 
     //! Rises an exception if there were error with the I/O.
-    void check_errors() throw (stxxl::io_error)
+    void check_errors()
     {
-        if (error.get())
-            throw *(error.get());
+        if (m_error.get())
+            throw *(m_error.get());
     }
 
+    virtual const char * io_type() const;
+
 protected:
     void check_nref(bool after = false)
     {
diff --git a/include/stxxl/bits/io/request_interface.h b/include/stxxl/bits/io/request_interface.h
index b4d4cb7..ca94bef 100644
--- a/include/stxxl/bits/io/request_interface.h
+++ b/include/stxxl/bits/io/request_interface.h
@@ -21,10 +21,9 @@
 #include <stxxl/bits/noncopyable.h>
 #include <stxxl/bits/common/types.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-//! \addtogroup iolayer
+//! \addtogroup reqlayer
 //! \{
 
 class onoff_switch;
@@ -48,12 +47,8 @@ public:
 protected:
     virtual void notify_waiters() = 0;
 
-public:
-    // HACK!
-    virtual void serve() = 0;
-
 protected:
-    virtual void completed() = 0;
+    virtual void completed(bool canceled) = 0;
 
 public:
     //! Suspends calling thread until completion of the request.
diff --git a/include/stxxl/bits/io/request_operations.h b/include/stxxl/bits/io/request_operations.h
index 58501a5..4d04828 100644
--- a/include/stxxl/bits/io/request_operations.h
+++ b/include/stxxl/bits/io/request_operations.h
@@ -20,20 +20,18 @@
 #include <stxxl/bits/io/iostats.h>
 #include <stxxl/bits/common/onoff_switch.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-//! \addtogroup iolayer
+//! \addtogroup reqlayer
 //! \{
 
 //! Collection of functions to track statuses of a number of requests.
 
-
 //! Suspends calling thread until \b all given requests are completed.
 //! \param reqs_begin begin of request sequence to wait for
 //! \param reqs_end end of request sequence to wait for
-template <class request_iterator_>
-void wait_all(request_iterator_ reqs_begin, request_iterator_ reqs_end)
+template <class RequestIterator>
+void wait_all(RequestIterator reqs_begin, RequestIterator reqs_end)
 {
     for ( ; reqs_begin != reqs_end; ++reqs_begin)
         (request_ptr(*reqs_begin))->wait();
@@ -55,10 +53,11 @@ inline void wait_all(request_ptr req_array[], size_t count)
 //! \param reqs_begin begin of request sequence
 //! \param reqs_end end of request sequence
 //! \return number of request canceled
-template <class request_iterator_>
-typename std::iterator_traits<request_iterator_>::difference_type cancel_all(request_iterator_ reqs_begin, request_iterator_ reqs_end)
+template <class RequestIterator>
+typename std::iterator_traits<RequestIterator>::difference_type
+cancel_all(RequestIterator reqs_begin, RequestIterator reqs_end)
 {
-    typename std::iterator_traits<request_iterator_>::difference_type num_canceled = 0;
+    typename std::iterator_traits<RequestIterator>::difference_type num_canceled = 0;
     while (reqs_begin != reqs_end)
     {
         if ((request_ptr(*reqs_begin))->cancel())
@@ -72,8 +71,8 @@ typename std::iterator_traits<request_iterator_>::difference_type cancel_all(req
 //! \param reqs_begin begin of request sequence to poll
 //! \param reqs_end end of request sequence to poll
 //! \return \c true if any of requests is completed, then index contains valid value, otherwise \c false
-template <class request_iterator_>
-request_iterator_ poll_any(request_iterator_ reqs_begin, request_iterator_ reqs_end)
+template <class RequestIterator>
+RequestIterator poll_any(RequestIterator reqs_begin, RequestIterator reqs_end)
 {
     while (reqs_begin != reqs_end)
     {
@@ -85,7 +84,6 @@ request_iterator_ poll_any(request_iterator_ reqs_begin, request_iterator_ reqs_
     return reqs_end;
 }
 
-
 //! Polls requests.
 //! \param req_array array of request_ptr objects
 //! \param count size of req_array
@@ -98,19 +96,18 @@ inline bool poll_any(request_ptr req_array[], size_t count, size_t& index)
     return res != (req_array + count);
 }
 
-
 //! Suspends calling thread until \b any of requests is completed.
 //! \param reqs_begin begin of request sequence to wait for
 //! \param reqs_end end of request sequence to wait for
 //! \return index in req_array pointing to the \b first completed request
-template <class request_iterator_>
-request_iterator_ wait_any(request_iterator_ reqs_begin, request_iterator_ reqs_end)
+template <class RequestIterator>
+RequestIterator wait_any(RequestIterator reqs_begin, RequestIterator reqs_end)
 {
     stats::scoped_wait_timer wait_timer(stats::WAIT_OP_ANY);
 
     onoff_switch sw;
 
-    request_iterator_ cur = reqs_begin, result = reqs_end;
+    RequestIterator cur = reqs_begin, result = reqs_end;
 
     for ( ; cur != reqs_end; cur++)
     {
@@ -145,7 +142,6 @@ request_iterator_ wait_any(request_iterator_ reqs_begin, request_iterator_ reqs_
     return result;
 }
 
-
 //! Suspends calling thread until \b any of requests is completed.
 //! \param req_array array of \c request_ptr objects
 //! \param count size of req_array
diff --git a/include/stxxl/bits/io/request_queue.h b/include/stxxl/bits/io/request_queue.h
index 1c65053..a6bdaa4 100644
--- a/include/stxxl/bits/io/request_queue.h
+++ b/include/stxxl/bits/io/request_queue.h
@@ -17,12 +17,12 @@
 #include <stxxl/bits/noncopyable.h>
 #include <stxxl/bits/io/request.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-//! \addtogroup iolayer
+//! \addtogroup reqlayer
 //! \{
 
+//! Interface of a request_queue to which requests can be added and canceled.
 class request_queue : private noncopyable
 {
 public:
diff --git a/include/stxxl/bits/io/request_queue_impl_1q.h b/include/stxxl/bits/io/request_queue_impl_1q.h
index a8c3660..f1854a4 100644
--- a/include/stxxl/bits/io/request_queue_impl_1q.h
+++ b/include/stxxl/bits/io/request_queue_impl_1q.h
@@ -20,26 +20,27 @@
 #include <stxxl/bits/io/request_queue_impl_worker.h>
 #include <stxxl/bits/common/mutex.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-//! \addtogroup iolayer
+//! \addtogroup reqlayer
 //! \{
 
+//! Implementation of a local request queue having only one queue for both read
+//! and write requests, thus having only one thread.
 class request_queue_impl_1q : public request_queue_impl_worker
 {
 private:
     typedef request_queue_impl_1q self;
     typedef std::list<request_ptr> queue_type;
 
-    mutex queue_mutex;
-    queue_type queue;
+    mutex m_queue_mutex;
+    queue_type m_queue;
 
     state<thread_state> m_thread_state;
-    thread_type thread;
-    semaphore sem;
+    thread_type m_thread;
+    semaphore m_sem;
 
-    static const priority_op _priority_op = WRITE;
+    static const priority_op m_priority_op = WRITE;
 
     static void * worker(void* arg);
 
diff --git a/include/stxxl/bits/io/request_queue_impl_qwqr.h b/include/stxxl/bits/io/request_queue_impl_qwqr.h
index f3b8408..2ab9cd9 100644
--- a/include/stxxl/bits/io/request_queue_impl_qwqr.h
+++ b/include/stxxl/bits/io/request_queue_impl_qwqr.h
@@ -21,28 +21,30 @@
 #include <stxxl/bits/io/request_queue_impl_worker.h>
 #include <stxxl/bits/common/mutex.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-//! \addtogroup iolayer
+//! \addtogroup reqlayer
 //! \{
 
+//! Implementation of a local request queue having two queues, one for read and
+//! one for write requests, thus having two threads. This is the default
+//! implementation.
 class request_queue_impl_qwqr : public request_queue_impl_worker
 {
 private:
     typedef request_queue_impl_qwqr self;
     typedef std::list<request_ptr> queue_type;
 
-    mutex write_mutex;
-    mutex read_mutex;
-    queue_type write_queue;
-    queue_type read_queue;
+    mutex m_write_mutex;
+    mutex m_read_mutex;
+    queue_type m_write_queue;
+    queue_type m_read_queue;
 
     state<thread_state> m_thread_state;
-    thread_type thread;
-    semaphore sem;
+    thread_type m_thread;
+    semaphore m_sem;
 
-    static const priority_op _priority_op = WRITE;
+    static const priority_op m_priority_op = WRITE;
 
     static void * worker(void* arg);
 
diff --git a/include/stxxl/bits/io/request_queue_impl_worker.h b/include/stxxl/bits/io/request_queue_impl_worker.h
index 5a2f0da..1489c0c 100644
--- a/include/stxxl/bits/io/request_queue_impl_worker.h
+++ b/include/stxxl/bits/io/request_queue_impl_worker.h
@@ -32,12 +32,14 @@
 #include <stxxl/bits/common/semaphore.h>
 #include <stxxl/bits/common/state.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-//! \addtogroup iolayer
+//! \addtogroup reqlayer
 //! \{
 
+//! Implementation of request queue worker threads. Worker threads can be
+//! started by start_thread and stopped with stop_thread. The queue state is
+//! checked before termination and updated afterwards.
 class request_queue_impl_worker : public request_queue
 {
 protected:
diff --git a/include/stxxl/bits/io/request_with_state.h b/include/stxxl/bits/io/request_with_state.h
index 9516baf..0b6f78f 100644
--- a/include/stxxl/bits/io/request_with_state.h
+++ b/include/stxxl/bits/io/request_with_state.h
@@ -19,21 +19,20 @@
 #include <stxxl/bits/io/request_with_waiters.h>
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-//! \addtogroup fileimpl
+//! \addtogroup reqlayer
 //! \{
 
 //! Request with completion state.
-class request_with_state : public request, public request_with_waiters
+class request_with_state : public request_with_waiters
 {
 protected:
     //! states of request
     //! OP - operating, DONE - request served, READY2DIE - can be destroyed
     enum request_state { OP = 0, DONE = 1, READY2DIE = 2 };
 
-    state<request_state> _state;
+    state<request_state> m_state;
 
 protected:
     request_with_state(
@@ -42,9 +41,9 @@ protected:
         void* buf,
         offset_type off,
         size_type b,
-        request_type t) :
-        request(on_cmpl, f, buf, off, b, t),
-        _state(OP)
+        request_type t)
+        : request_with_waiters(on_cmpl, f, buf, off, b, t),
+          m_state(OP)
     { }
 
 public:
@@ -52,6 +51,9 @@ public:
     void wait(bool measure_time = true);
     bool poll();
     bool cancel();
+
+protected:
+    void completed(bool canceled);
 };
 
 //! \}
diff --git a/include/stxxl/bits/io/request_with_waiters.h b/include/stxxl/bits/io/request_with_waiters.h
index 081491d..56f284e 100644
--- a/include/stxxl/bits/io/request_with_waiters.h
+++ b/include/stxxl/bits/io/request_with_waiters.h
@@ -18,28 +18,38 @@
 
 #include <stxxl/bits/common/mutex.h>
 #include <stxxl/bits/common/onoff_switch.h>
-#include <stxxl/bits/io/request_interface.h>
+#include <stxxl/bits/io/request.h>
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-//! \addtogroup fileimpl
+//! \addtogroup reqlayer
 //! \{
 
 //! Request that is aware of threads waiting for it to complete.
-class request_with_waiters : virtual public request_interface
+class request_with_waiters : public request
 {
-    mutex waiters_mutex;
-    std::set<onoff_switch*> waiters;
+    mutex m_waiters_mutex;
+    std::set<onoff_switch*> m_waiters;
 
 protected:
     bool add_waiter(onoff_switch* sw);
     void delete_waiter(onoff_switch* sw);
     void notify_waiters();
-    /*
-    int nwaiters();             // returns number of waiters
-    */
+
+    //! returns number of waiters
+    size_t num_waiters();
+
+public:
+    request_with_waiters(
+        const completion_handler& on_cmpl,
+        file* f,
+        void* buf,
+        offset_type off,
+        size_type b,
+        request_type t)
+        : request(on_cmpl, f, buf, off, b, t)
+    { }
 };
 
 //! \}
diff --git a/include/stxxl/bits/io/serving_request.h b/include/stxxl/bits/io/serving_request.h
index 7ec7b5b..f0bfcc9 100644
--- a/include/stxxl/bits/io/serving_request.h
+++ b/include/stxxl/bits/io/serving_request.h
@@ -16,10 +16,9 @@
 
 #include <stxxl/bits/io/request_with_state.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-//! \addtogroup iolayer
+//! \addtogroup reqlayer
 //! \{
 
 //! Request which serves an I/O by calling the synchronous routine of the file.
@@ -27,6 +26,8 @@ class serving_request : public request_with_state
 {
     template <class base_file_type>
     friend class fileperblock_file;
+    friend class request_queue_impl_qwqr;
+    friend class request_queue_impl_1q;
 
 public:
     serving_request(
@@ -38,8 +39,7 @@ public:
         request_type t);
 
 protected:
-    void serve();
-    void completed();
+    virtual void serve();
 
 public:
     const char * io_type() const;
diff --git a/include/stxxl/bits/io/simdisk_file.h b/include/stxxl/bits/io/simdisk_file.h
index 3571196..b9ddc50 100644
--- a/include/stxxl/bits/io/simdisk_file.h
+++ b/include/stxxl/bits/io/simdisk_file.h
@@ -30,14 +30,11 @@
 #include <stxxl/bits/io/ufs_file_base.h>
 #include <stxxl/bits/io/disk_queued_file.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \weakgroup fileimpl
 //! \{
 
- #define AVERAGE_SPEED (15 * 1024 * 1024)
-
 class simdisk_geometry : private noncopyable
 {
     struct Zone
@@ -61,14 +58,15 @@ class simdisk_geometry : private noncopyable
 #endif
             int _first_sector,
             int _sectors,
-            double _rate) :
+            double _rate)
+            :
 #if 0
-            last_cyl(_last_cyl),
-            sect_per_track(_sect_per_track),
+              last_cyl(_last_cyl),
+              sect_per_track(_sect_per_track),
 #endif
-            first_sector(_first_sector),
-            sectors(_sectors),
-            sustained_data_rate(_rate)
+              first_sector(_first_sector),
+              sectors(_sectors),
+              sustained_data_rate(_rate)
         { }
     };
     struct ZoneCmp
@@ -101,8 +99,9 @@ public:
 
     inline ~simdisk_geometry()
     { }
-};
 
+    static const double s_average_speed;
+};
 
 class IC35L080AVVA07 : public simdisk_geometry              // IBM series 120GXP
 {
@@ -121,13 +120,23 @@ public:
     //! \param mode open mode, see \c stxxl::file::open_modes
     //! \param queue_id disk queue identifier
     //! \param allocator_id linked disk_allocator
-    inline sim_disk_file(const std::string& filename, int mode, int queue_id = DEFAULT_QUEUE, int allocator_id = NO_ALLOCATOR) : ufs_file_base(filename, mode), disk_queued_file(queue_id, allocator_id)
+    //! \param device_id physical device identifier
+    inline sim_disk_file(
+        const std::string& filename,
+        int mode,
+        int queue_id = DEFAULT_QUEUE,
+        int allocator_id = NO_ALLOCATOR,
+        unsigned int device_id = DEFAULT_DEVICE_ID)
+        : file(device_id),
+          ufs_file_base(filename, mode),
+          disk_queued_file(queue_id, allocator_id)
     {
         std::cout << "Please, make sure that '" << filename <<
             "' is resided on swap memory partition!" <<
             std::endl;
     }
-    void serve(const request* req) throw (io_error);
+    void serve(void* buffer, offset_type offset, size_type bytes,
+               request::request_type type);
     void set_size(offset_type newsize);
     const char * io_type() const;
 };
diff --git a/include/stxxl/bits/io/syscall_file.h b/include/stxxl/bits/io/syscall_file.h
index 3868d36..9dd9c8b 100644
--- a/include/stxxl/bits/io/syscall_file.h
+++ b/include/stxxl/bits/io/syscall_file.h
@@ -6,6 +6,7 @@
  *  Copyright (C) 2002 Roman Dementiev <dementiev at mpi-sb.mpg.de>
  *  Copyright (C) 2008 Andreas Beckmann <beckmann at cs.uni-frankfurt.de>
  *  Copyright (C) 2009 Johannes Singler <singler at ira.uka.de>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
  *
  *  Distributed under the Boost Software License, Version 1.0.
  *  (See accompanying file LICENSE_1_0.txt or copy at
@@ -18,7 +19,6 @@
 #include <stxxl/bits/io/ufs_file_base.h>
 #include <stxxl/bits/io/disk_queued_file.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup fileimpl
@@ -33,14 +33,19 @@ public:
     //! \param mode open mode, see \c stxxl::file::open_modes
     //! \param queue_id disk queue identifier
     //! \param allocator_id linked disk_allocator
+    //! \param device_id physical device identifier
     syscall_file(
         const std::string& filename,
         int mode,
         int queue_id = DEFAULT_QUEUE,
-        int allocator_id = NO_ALLOCATOR)
-        : ufs_file_base(filename, mode), disk_queued_file(queue_id, allocator_id)
+        int allocator_id = NO_ALLOCATOR,
+        unsigned int device_id = DEFAULT_DEVICE_ID)
+        : file(device_id),
+          ufs_file_base(filename, mode),
+          disk_queued_file(queue_id, allocator_id)
     { }
-    void serve(const request* req) throw (io_error);
+    void serve(void* buffer, offset_type offset, size_type bytes,
+               request::request_type type);
     const char * io_type() const;
 };
 
diff --git a/include/stxxl/bits/io/ufs_file_base.h b/include/stxxl/bits/io/ufs_file_base.h
index d4fc337..962d5dd 100644
--- a/include/stxxl/bits/io/ufs_file_base.h
+++ b/include/stxxl/bits/io/ufs_file_base.h
@@ -24,7 +24,6 @@
 
 #include <string>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup fileimpl
diff --git a/include/stxxl/bits/io/wbtl_file.h b/include/stxxl/bits/io/wbtl_file.h
index 067b94f..933a216 100644
--- a/include/stxxl/bits/io/wbtl_file.h
+++ b/include/stxxl/bits/io/wbtl_file.h
@@ -26,7 +26,6 @@
 
 #include <stxxl/bits/io/disk_queued_file.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup fileimpl
@@ -89,7 +88,8 @@ public:
     offset_type size();
     void set_size(offset_type newsize);
     void lock();
-    void serve(const request* req) throw (io_error);
+    void serve(void* buffer, offset_type offset, size_type bytes,
+               request::request_type type);
     void discard(offset_type offset, offset_type size);
     const char * io_type() const;
 
diff --git a/include/stxxl/bits/io/wfs_file_base.h b/include/stxxl/bits/io/wfs_file_base.h
index ef70056..483e640 100644
--- a/include/stxxl/bits/io/wfs_file_base.h
+++ b/include/stxxl/bits/io/wfs_file_base.h
@@ -24,7 +24,6 @@
 #include <stxxl/bits/io/file.h>
 #include <stxxl/bits/io/request.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup fileimpl
diff --git a/include/stxxl/bits/io/wincall_file.h b/include/stxxl/bits/io/wincall_file.h
index 8d9df21..e2097af 100644
--- a/include/stxxl/bits/io/wincall_file.h
+++ b/include/stxxl/bits/io/wincall_file.h
@@ -5,7 +5,7 @@
  *
  *  Copyright (C) 2005-2006 Roman Dementiev <dementiev at ira.uka.de>
  *  Copyright (C) 2008 Andreas Beckmann <beckmann at cs.uni-frankfurt.de>
- *  Copyright (C) 2009, 2010 Johannes Singler <singler at kit.edu>
+ *  Copyright (C) 2009-2010 Johannes Singler <singler at kit.edu>
  *
  *  Distributed under the Boost Software License, Version 1.0.
  *  (See accompanying file LICENSE_1_0.txt or copy at
@@ -30,7 +30,6 @@
 #include <stxxl/bits/io/wfs_file_base.h>
 #include <stxxl/bits/io/disk_queued_file.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup fileimpl
@@ -45,13 +44,19 @@ public:
     //! \param mode open mode, see \c stxxl::file::open_modes
     //! \param queue_id disk queue identifier
     //! \param allocator_id linked disk_allocator
+    //! \param device_id physical device identifier
     wincall_file(
         const std::string& filename,
         int mode,
-        int queue_id = DEFAULT_QUEUE, int allocator_id = NO_ALLOCATOR)
-        : wfs_file_base(filename, mode), disk_queued_file(queue_id, allocator_id)
+        int queue_id = DEFAULT_QUEUE,
+        int allocator_id = NO_ALLOCATOR,
+        unsigned int device_id = DEFAULT_DEVICE_ID)
+        : file(device_id),
+          wfs_file_base(filename, mode),
+          disk_queued_file(queue_id, allocator_id)
     { }
-    void serve(const request* req) throw (io_error);
+    void serve(void* buffer, offset_type offset, size_type bytes,
+               request::request_type type);
     const char * io_type() const;
 };
 
diff --git a/include/stxxl/bits/mng/adaptor.h b/include/stxxl/bits/mng/adaptor.h
index 27d9b49..2f6bab6 100644
--- a/include/stxxl/bits/mng/adaptor.h
+++ b/include/stxxl/bits/mng/adaptor.h
@@ -19,14 +19,12 @@
 
 #include <stxxl/bits/common/types.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup mnglayer
 //!
 //! \{
 
-
 template <unsigned_type modulo>
 class blocked_index
 {
@@ -136,86 +134,89 @@ public:
     }
 };
 
-#define STXXL_ADAPTOR_ARITHMETICS(pos)         \
-    bool operator == (const _Self& a) const    \
-    {                                          \
-        return (a.pos == pos);                 \
-    }                                          \
-    bool operator != (const _Self& a) const    \
-    {                                          \
-        return (a.pos != pos);                 \
-    }                                          \
-    bool operator < (const _Self& a) const     \
-    {                                          \
-        return (pos < a.pos);                  \
-    }                                          \
-    bool operator > (const _Self& a) const     \
-    {                                          \
-        return (pos > a.pos);                  \
-    }                                          \
-    bool operator <= (const _Self& a) const    \
-    {                                          \
-        return (pos <= a.pos);                 \
-    }                                          \
-    bool operator >= (const _Self& a) const    \
-    {                                          \
-        return (pos >= a.pos);                 \
-    }                                          \
-    _Self operator + (pos_type off) const      \
-    {                                          \
-        return _Self(array, pos + off);        \
-    }                                          \
-    _Self operator - (pos_type off) const      \
-    {                                          \
-        return _Self(array, pos - off);        \
-    }                                          \
-    _Self& operator ++ ()                      \
-    {                                          \
-        pos++;                                 \
-        return *this;                          \
-    }                                          \
-    _Self operator ++ (int)                    \
-    {                                          \
-        _Self tmp = *this;                     \
-        pos++;                                 \
-        return tmp;                            \
-    }                                          \
-    _Self& operator -- ()                      \
-    {                                          \
-        pos--;                                 \
-        return *this;                          \
-    }                                          \
-    _Self operator -- (int)                    \
-    {                                          \
-        _Self tmp = *this;                     \
-        pos--;                                 \
-        return tmp;                            \
-    }                                          \
-    pos_type operator - (const _Self& a) const \
-    {                                          \
-        return pos - a.pos;                    \
-    }                                          \
-    _Self& operator -= (pos_type off)          \
-    {                                          \
-        pos -= off;                            \
-        return *this;                          \
-    }                                          \
-    _Self& operator += (pos_type off)          \
-    {                                          \
-        pos += off;                            \
-        return *this;                          \
-    }
-
-template <class one_dim_array_type, class data_type, class pos_type>
+#define STXXL_ADAPTOR_ARITHMETICS(pos)             \
+    bool operator == (const self_type& a) const    \
+    {                                              \
+        return (a.pos == pos);                     \
+    }                                              \
+    bool operator != (const self_type& a) const    \
+    {                                              \
+        return (a.pos != pos);                     \
+    }                                              \
+    bool operator < (const self_type& a) const     \
+    {                                              \
+        return (pos < a.pos);                      \
+    }                                              \
+    bool operator > (const self_type& a) const     \
+    {                                              \
+        return (pos > a.pos);                      \
+    }                                              \
+    bool operator <= (const self_type& a) const    \
+    {                                              \
+        return (pos <= a.pos);                     \
+    }                                              \
+    bool operator >= (const self_type& a) const    \
+    {                                              \
+        return (pos >= a.pos);                     \
+    }                                              \
+    self_type operator + (pos_type off) const      \
+    {                                              \
+        return self_type(array, pos + off);        \
+    }                                              \
+    self_type operator - (pos_type off) const      \
+    {                                              \
+        return self_type(array, pos - off);        \
+    }                                              \
+    self_type& operator ++ ()                      \
+    {                                              \
+        pos++;                                     \
+        return *this;                              \
+    }                                              \
+    self_type operator ++ (int)                    \
+    {                                              \
+        self_type tmp = *this;                     \
+        pos++;                                     \
+        return tmp;                                \
+    }                                              \
+    self_type& operator -- ()                      \
+    {                                              \
+        pos--;                                     \
+        return *this;                              \
+    }                                              \
+    self_type operator -- (int)                    \
+    {                                              \
+        self_type tmp = *this;                     \
+        pos--;                                     \
+        return tmp;                                \
+    }                                              \
+    pos_type operator - (const self_type& a) const \
+    {                                              \
+        return pos - a.pos;                        \
+    }                                              \
+    self_type& operator -= (pos_type off)          \
+    {                                              \
+        pos -= off;                                \
+        return *this;                              \
+    }                                              \
+    self_type& operator += (pos_type off)          \
+    {                                              \
+        pos += off;                                \
+        return *this;                              \
+    }
+
+template <class OneDimArrayType, class DataType, class PosType>
 struct two2one_dim_array_adapter_base
-    : public std::iterator<std::random_access_iterator_tag, data_type, unsigned_type>
+    : public std::iterator<std::random_access_iterator_tag, DataType, unsigned_type>
 {
-    one_dim_array_type* array;
-    pos_type pos;
-    typedef pos_type _pos_type;
+    typedef OneDimArrayType one_dim_array_type;
+    typedef DataType data_type;
+    typedef PosType pos_type;
+
     typedef two2one_dim_array_adapter_base<one_dim_array_type,
-                                           data_type, pos_type> _Self;
+                                           data_type, pos_type> self_type;
 
+    one_dim_array_type* array;
+    pos_type pos;
 
     two2one_dim_array_adapter_base()
     { }
@@ -230,102 +231,105 @@ struct two2one_dim_array_adapter_base
     STXXL_ADAPTOR_ARITHMETICS(pos)
 };
 
-
 //////////////////////////////
 
-#define BLOCK_ADAPTOR_OPERATORS(two_to_one_dim_array_adaptor_base)                                      \
-                                                                                                        \
-    template <unsigned _blk_sz, typename _run_type, class __pos_type>                                   \
-    inline two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>& operator ++ (             \
-        two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>& a)                           \
-    {                                                                                                   \
-        a.pos++;                                                                                        \
-        return a;                                                                                       \
-    }                                                                                                   \
-                                                                                                        \
-    template <unsigned _blk_sz, typename _run_type, class __pos_type>                                   \
-    inline two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type> operator ++ (              \
-        two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>& a, int)                      \
-    {                                                                                                   \
-        two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type> tmp = a;                      \
-        a.pos++;                                                                                        \
-        return tmp;                                                                                     \
-    }                                                                                                   \
-                                                                                                        \
-    template <unsigned _blk_sz, typename _run_type, class __pos_type>                                   \
-    inline two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>& operator -- (             \
-        two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>& a)                           \
-    {                                                                                                   \
-        a.pos--;                                                                                        \
-        return a;                                                                                       \
-    }                                                                                                   \
-                                                                                                        \
-    template <unsigned _blk_sz, typename _run_type, class __pos_type>                                   \
-    inline two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type> operator -- (              \
-        two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>& a, int)                      \
-    {                                                                                                   \
-        two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type> tmp = a;                      \
-        a.pos--;                                                                                        \
-        return tmp;                                                                                     \
-    }                                                                                                   \
-                                                                                                        \
-    template <unsigned _blk_sz, typename _run_type, class __pos_type>                                   \
-    inline two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>& operator -= (             \
-        two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>& a,                           \
-        typename two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>::_pos_type off)      \
-    {                                                                                                   \
-        a.pos -= off;                                                                                   \
-        return a;                                                                                       \
-    }                                                                                                   \
-                                                                                                        \
-    template <unsigned _blk_sz, typename _run_type, class __pos_type>                                   \
-    inline two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>& operator += (             \
-        two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>& a,                           \
-        typename two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>::_pos_type off)      \
-    {                                                                                                   \
-        a.pos += off;                                                                                   \
-        return a;                                                                                       \
-    }                                                                                                   \
-                                                                                                        \
-    template <unsigned _blk_sz, typename _run_type, class __pos_type>                                   \
-    inline two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type> operator + (               \
-        const two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>& a,                     \
-        typename two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>::_pos_type off)      \
-    {                                                                                                   \
-        return two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>(a.array, a.pos + off); \
-    }                                                                                                   \
-                                                                                                        \
-    template <unsigned _blk_sz, typename _run_type, class __pos_type>                                   \
-    inline two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type> operator + (               \
-        typename two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>::_pos_type off,      \
-        const two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>& a)                     \
-    {                                                                                                   \
-        return two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>(a.array, a.pos + off); \
-    }                                                                                                   \
-                                                                                                        \
-    template <unsigned _blk_sz, typename _run_type, class __pos_type>                                   \
-    inline two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type> operator - (               \
-        const two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>& a,                     \
-        typename two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>::_pos_type off)      \
-    {                                                                                                   \
-        return two_to_one_dim_array_adaptor_base<_blk_sz, _run_type, __pos_type>(a.array, a.pos - off); \
+#define BLOCK_ADAPTOR_OPERATORS(two_to_one_dim_array_adaptor_base)                                   \
+                                                                                                     \
+    template <unsigned BlockSize, typename RunType, class PosType>                                   \
+    inline two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>& operator ++ (             \
+        two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>& a)                           \
+    {                                                                                                \
+        a.pos++;                                                                                     \
+        return a;                                                                                    \
+    }                                                                                                \
+                                                                                                     \
+    template <unsigned BlockSize, typename RunType, class PosType>                                   \
+    inline two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType> operator ++ (              \
+        two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>& a, int)                      \
+    {                                                                                                \
+        two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType> tmp = a;                      \
+        a.pos++;                                                                                     \
+        return tmp;                                                                                  \
+    }                                                                                                \
+                                                                                                     \
+    template <unsigned BlockSize, typename RunType, class PosType>                                   \
+    inline two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>& operator -- (             \
+        two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>& a)                           \
+    {                                                                                                \
+        a.pos--;                                                                                     \
+        return a;                                                                                    \
+    }                                                                                                \
+                                                                                                     \
+    template <unsigned BlockSize, typename RunType, class PosType>                                   \
+    inline two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType> operator -- (              \
+        two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>& a, int)                      \
+    {                                                                                                \
+        two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType> tmp = a;                      \
+        a.pos--;                                                                                     \
+        return tmp;                                                                                  \
+    }                                                                                                \
+                                                                                                     \
+    template <unsigned BlockSize, typename RunType, class PosType>                                   \
+    inline two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>& operator -= (             \
+        two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>& a,                           \
+        typename two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>::_pos_type off)      \
+    {                                                                                                \
+        a.pos -= off;                                                                                \
+        return a;                                                                                    \
+    }                                                                                                \
+                                                                                                     \
+    template <unsigned BlockSize, typename RunType, class PosType>                                   \
+    inline two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>& operator += (             \
+        two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>& a,                           \
+        typename two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>::_pos_type off)      \
+    {                                                                                                \
+        a.pos += off;                                                                                \
+        return a;                                                                                    \
+    }                                                                                                \
+                                                                                                     \
+    template <unsigned BlockSize, typename RunType, class PosType>                                   \
+    inline two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType> operator + (               \
+        const two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>& a,                     \
+        typename two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>::_pos_type off)      \
+    {                                                                                                \
+        return two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>(a.array, a.pos + off); \
+    }                                                                                                \
+                                                                                                     \
+    template <unsigned BlockSize, typename RunType, class PosType>                                   \
+    inline two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType> operator + (               \
+        typename two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>::_pos_type off,      \
+        const two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>& a)                     \
+    {                                                                                                \
+        return two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>(a.array, a.pos + off); \
+    }                                                                                                \
+                                                                                                     \
+    template <unsigned BlockSize, typename RunType, class PosType>                                   \
+    inline two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType> operator - (               \
+        const two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>& a,                     \
+        typename two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>::_pos_type off)      \
+    {                                                                                                \
+        return two_to_one_dim_array_adaptor_base<BlockSize, RunType, PosType>(a.array, a.pos - off); \
     }
 
-
 #if 0
 //////////////////////////
-template <class one_dim_array_type, class data_type,
-          unsigned dim_size, class pos_type = blocked_index<dim_size> >
-struct two2one_dim_array_row_adapter :
-    public two2one_dim_array_adapter_base<one_dim_array_type, data_type, pos_type>
+template <class OneDimArrayType, class DataType,
+          unsigned DimSize, class PosType = blocked_index<DimSize> >
+struct two2one_dim_array_row_adapter
+    : public two2one_dim_array_adapter_base<OneDimArrayType, DataType, PosType>
 {
+    typedef OneDimArrayType one_dim_array_type;
+    typedef DataType data_type;
+    typedef DimSize dim_type;
+    typedef PosType pos_type;
+
     typedef two2one_dim_array_row_adapter<one_dim_array_type,
-                                          data_type, dim_size, pos_type> _Self;
+                                          data_type, dim_size, pos_type> self_type;
 
     typedef two2one_dim_array_adapter_base<one_dim_array_type,
-                                           data_type, pos_type> _Parent;
-    using _Parent::array;
-    using _Parent::pos;
+                                           data_type, pos_type> base_type;
+    using base_type::array;
+    using base_type::pos;
 
     two2one_dim_array_row_adapter()
     { }
@@ -360,13 +364,13 @@ struct two2one_dim_array_row_adapter :
     STXXL_ADAPTOR_ARITHMETICS(pos)
 };
 
-template <class one_dim_array_type, class data_type,
-          unsigned dim_size, class pos_type = blocked_index<dim_size> >
+template <class OneDimArrayType, class DataType,
+          unsigned DimSize, class PosType = blocked_index<DimSize> >
 struct two2one_dim_array_column_adapter
-    : public two2one_dim_array_adapter_base<one_dim_array_type, data_type, pos_type>
+    : public two2one_dim_array_adapter_base<OneDimArrayType, DataType, PosType>
 {
     typedef two2one_dim_array_column_adapter<one_dim_array_type,
-                                             data_type, dim_size, pos_type> _Self;
+                                             data_type, dim_size, pos_type> self_type;
 
     using two2one_dim_array_adapter_base<one_dim_array_type, data_type, pos_type>::pos;
     using two2one_dim_array_adapter_base<one_dim_array_type, data_type, pos_type>::array;
@@ -374,7 +378,7 @@ struct two2one_dim_array_column_adapter
     two2one_dim_array_column_adapter(one_dim_array_type* a, pos_type p)
         : two2one_dim_array_adapter_base<one_dim_array_type, data_type, pos_type>(a, p)
     { }
-    two2one_dim_array_column_adapter(const _Self& a)
+    two2one_dim_array_column_adapter(const self_type& a)
         : two2one_dim_array_adapter_base<one_dim_array_type, data_type, pos_type>(a)
     { }
 
@@ -403,10 +407,15 @@ struct two2one_dim_array_column_adapter
 };
 #endif
 
-
-template <typename array_type, typename value_type, unsigned_type modulo>
-class array_of_sequences_iterator : public std::iterator<std::random_access_iterator_tag, value_type, unsigned_type>
+template <typename ArrayType, typename ValueType, unsigned_type modulo>
+class array_of_sequences_iterator
+    : public std::iterator<std::random_access_iterator_tag, ValueType, unsigned_type>
 {
+public:
+    typedef ArrayType array_type;
+    typedef ValueType value_type;
+
+protected:
     unsigned_type pos;
     unsigned_type offset;
     array_type* arrays;
@@ -581,37 +590,41 @@ public:
 
 namespace helper {
 
-template <typename BlockType, bool can_use_trivial_pointer>
+template <typename BlockType, typename SizeType, bool CanUseTrivialPointer>
 class element_iterator_generator
 { };
 
 // default case for blocks with fillers or other data: use array_of_sequences_iterator
-template <typename BlockType>
-class element_iterator_generator<BlockType, false>
+template <typename BlockType, typename SizeType>
+class element_iterator_generator<BlockType, SizeType, false>
 {
     typedef BlockType block_type;
     typedef typename block_type::value_type value_type;
 
+    typedef SizeType size_type;
+
 public:
     typedef array_of_sequences_iterator<block_type, value_type, block_type::size> iterator;
 
-    iterator operator () (block_type* blocks, unsigned_type offset) const
+    iterator operator () (block_type* blocks, SizeType offset) const
     {
         return iterator(blocks, offset);
     }
 };
 
 // special case for completely filled blocks: use trivial pointers
-template <typename BlockType>
-class element_iterator_generator<BlockType, true>
+template <typename BlockType, typename SizeType>
+class element_iterator_generator<BlockType, SizeType, true>
 {
     typedef BlockType block_type;
     typedef typename block_type::value_type value_type;
 
+    typedef SizeType size_type;
+
 public:
     typedef value_type* iterator;
 
-    iterator operator () (block_type* blocks, unsigned_type offset) const
+    iterator operator () (block_type* blocks, SizeType offset) const
     {
         return blocks[0].elem + offset;
     }
@@ -619,18 +632,22 @@ public:
 
 } // namespace helper
 
-template <typename BlockType>
+template <typename BlockType, typename SizeType>
 struct element_iterator_traits
 {
-    typedef typename helper::element_iterator_generator<BlockType, BlockType::has_only_data>::iterator element_iterator;
+    typedef typename helper::element_iterator_generator<
+            BlockType, SizeType, BlockType::has_only_data
+            >::iterator element_iterator;
 };
 
-template <typename BlockType>
+template <typename BlockType, typename SizeType>
 inline
-typename element_iterator_traits<BlockType>::element_iterator
-make_element_iterator(BlockType* blocks, unsigned_type offset)
+typename element_iterator_traits<BlockType, SizeType>::element_iterator
+make_element_iterator(BlockType* blocks, SizeType offset)
 {
-    helper::element_iterator_generator<BlockType, BlockType::has_only_data> iter_gen;
+    helper::element_iterator_generator<
+        BlockType, SizeType, BlockType::has_only_data
+        > iter_gen;
     return iter_gen(blocks, offset);
 }
 
diff --git a/include/stxxl/bits/mng/bid.h b/include/stxxl/bits/mng/bid.h
index 0f30b84..838e731 100644
--- a/include/stxxl/bits/mng/bid.h
+++ b/include/stxxl/bits/mng/bid.h
@@ -29,7 +29,6 @@
 #endif
 #define FMT_BID(_bid_) "[" << (_bid_).storage->get_allocator_id() << "]0x" << std::hex << std::setfill('0') << std::setw(8) << (_bid_).offset << "/0x" << std::setw(8) << (_bid_).size
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup mnglayer
@@ -38,13 +37,13 @@ STXXL_BEGIN_NAMESPACE
 //! Block identifier class.
 //!
 //! Stores block identity, given by file and offset within the file
-template <unsigned SIZE>
+template <unsigned Size>
 struct BID
 {
     enum
     {
-        size = SIZE,         //!< Block size
-        t_size = SIZE        //!< Blocks size, given by the parameter
+        size = Size,         //!< Block size
+        t_size = Size        //!< Blocks size, given by the parameter
     };
 
     file* storage;           //!< pointer to the file of the block
@@ -65,7 +64,8 @@ struct BID
     { }
 
     template <unsigned BlockSize>
-    explicit BID(const BID<BlockSize>& obj) : storage(obj.storage), offset(obj.offset)
+    explicit BID(const BID<BlockSize>& obj)
+        : storage(obj.storage), offset(obj.offset)
     { }
 
     template <unsigned BlockSize>
@@ -82,7 +82,6 @@ struct BID
     }
 };
 
-
 //! Specialization of block identifier class (BID) for variable size block size.
 //!
 //! Stores block identity, given by file, offset within the file, and size of the block
@@ -110,20 +109,20 @@ struct BID<0>
     }
 };
 
-template <unsigned blk_sz>
-bool operator == (const BID<blk_sz>& a, const BID<blk_sz>& b)
+template <unsigned BlockSize>
+bool operator == (const BID<BlockSize>& a, const BID<BlockSize>& b)
 {
     return (a.storage == b.storage) && (a.offset == b.offset) && (a.size == b.size);
 }
 
-template <unsigned blk_sz>
-bool operator != (const BID<blk_sz>& a, const BID<blk_sz>& b)
+template <unsigned BlockSize>
+bool operator != (const BID<BlockSize>& a, const BID<BlockSize>& b)
 {
     return (a.storage != b.storage) || (a.offset != b.offset) || (a.size != b.size);
 }
 
-template <unsigned blk_sz>
-std::ostream& operator << (std::ostream& s, const BID<blk_sz>& bid)
+template <unsigned BlockSize>
+std::ostream& operator << (std::ostream& s, const BID<BlockSize>& bid)
 {
     // [0x12345678|0]0x00100000/0x00010000
     // [file ptr|file id]offset/size
@@ -142,16 +141,16 @@ std::ostream& operator << (std::ostream& s, const BID<blk_sz>& bid)
     return s;
 }
 
-template <unsigned BLK_SIZE>
-class BIDArray : public simple_vector<BID<BLK_SIZE> >
+template <unsigned BlockSize>
+class BIDArray : public simple_vector<BID<BlockSize> >
 {
 public:
     BIDArray()
-        : simple_vector<BID<BLK_SIZE> >()
+        : simple_vector<BID<BlockSize> >()
     { }
 
     BIDArray(unsigned_type size)
-        : simple_vector<BID<BLK_SIZE> >(size)
+        : simple_vector<BID<BlockSize> >(size)
     { }
 };
 
diff --git a/include/stxxl/bits/mng/block_alloc.h b/include/stxxl/bits/mng/block_alloc.h
index 921567e..19ade8b 100644
--- a/include/stxxl/bits/mng/block_alloc.h
+++ b/include/stxxl/bits/mng/block_alloc.h
@@ -19,7 +19,6 @@
 #include <stxxl/bits/common/rand.h>
 #include <stxxl/bits/mng/config.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \defgroup alloc Allocation Functors
@@ -217,25 +216,25 @@ struct single_disk
 //! Allocator functor adaptor.
 //!
 //! Gives offset to disk number sequence defined in constructor
-template <class BaseAllocator_>
+template <class BaseAllocator>
 struct offset_allocator
 {
-    BaseAllocator_ base;
+    BaseAllocator base;
     int_type offset;
 
-    //! Creates functor based on instance of \c BaseAllocator_ functor
+    //! Creates functor based on instance of \c BaseAllocator functor
     //! with offset \c offset_.
     //! \param offset_ offset
     //! \param base_ used to create a copy
-    offset_allocator(int_type offset_, const BaseAllocator_& base_) : base(base_), offset(offset_)
+    offset_allocator(int_type offset_, const BaseAllocator& base_) : base(base_), offset(offset_)
     { }
 
-    //! Creates functor based on instance of \c BaseAllocator_ functor.
+    //! Creates functor based on instance of \c BaseAllocator functor.
     //! \param base_ used to create a copy
-    offset_allocator(const BaseAllocator_& base_) : base(base_), offset(0)
+    offset_allocator(const BaseAllocator& base_) : base(base_), offset(0)
     { }
 
-    //! Creates functor based on default \c BaseAllocator_ functor.
+    //! Creates functor based on default \c BaseAllocator functor.
     offset_allocator() : offset(0)
     { }
 
diff --git a/include/stxxl/bits/mng/block_alloc_interleaved.h b/include/stxxl/bits/mng/block_alloc_interleaved.h
index 07510c1..a82ee34 100644
--- a/include/stxxl/bits/mng/block_alloc_interleaved.h
+++ b/include/stxxl/bits/mng/block_alloc_interleaved.h
@@ -19,7 +19,6 @@
 #include <stxxl/bits/mng/block_manager.h>
 #include <stxxl/bits/common/rand.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 #define CHECK_RUN_BOUNDS(pos)
@@ -80,7 +79,6 @@ struct interleaved_SR : public interleaved_striping
     }
 };
 
-
 struct interleaved_RC : public interleaved_striping
 {
     std::vector<std::vector<unsigned_type> > perms;
diff --git a/include/stxxl/bits/mng/block_manager.h b/include/stxxl/bits/mng/block_manager.h
index bdd97d3..b0302eb 100644
--- a/include/stxxl/bits/mng/block_manager.h
+++ b/include/stxxl/bits/mng/block_manager.h
@@ -51,9 +51,7 @@ STXXL_BEGIN_NAMESPACE
 #define STXXL_MNG_COUNT_ALLOCATION 1
 #endif // STXXL_MNG_COUNT_ALLOCATION
 
-//! \defgroup mnglayer Block Management Layer
-//! Group of classes which help controlling external memory space,
-//! managing disks, and allocating and deallocating blocks of external storage
+//! \addtogroup mnglayer
 //! \{
 
 //! Block manager class.
@@ -184,7 +182,6 @@ public:
 #endif // STXXL_MNG_COUNT_ALLOCATION
 };
 
-
 template <class BIDType, class DiskAssignFunctor, class OutputIterator>
 void block_manager::new_blocks_int(
     const unsigned_type nblocks,
@@ -235,9 +232,8 @@ void block_manager::new_blocks_int(
 #endif // STXXL_MNG_COUNT_ALLOCATION
 }
 
-
-template <unsigned BLK_SIZE>
-void block_manager::delete_block(const BID<BLK_SIZE>& bid)
+template <unsigned BlockSize>
+void block_manager::delete_block(const BID<BlockSize>& bid)
 {
     // do not uncomment it
     //assert(bid.storage->get_allocator_id() < config::get_instance()->disks_number());
@@ -249,11 +245,10 @@ void block_manager::delete_block(const BID<BLK_SIZE>& bid)
     disk_files[bid.storage->get_allocator_id()]->discard(bid.offset, bid.size);
 
 #if STXXL_MNG_COUNT_ALLOCATION
-    m_current_allocation -= BLK_SIZE;
+    m_current_allocation -= BlockSize;
 #endif // STXXL_MNG_COUNT_ALLOCATION
 }
 
-
 template <class BIDIteratorClass>
 void block_manager::delete_blocks(
     const BIDIteratorClass& bidbegin,
diff --git a/include/stxxl/bits/mng/block_prefetcher.h b/include/stxxl/bits/mng/block_prefetcher.h
index 1af9d5d..8fe69b3 100644
--- a/include/stxxl/bits/mng/block_prefetcher.h
+++ b/include/stxxl/bits/mng/block_prefetcher.h
@@ -23,26 +23,26 @@
 #include <stxxl/bits/io/iostats.h>
 #include <stxxl/bits/noncopyable.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup schedlayer
 //! \{
 
-
 class set_switch_handler
 {
     onoff_switch& switch_;
     completion_handler on_compl;
 
 public:
-    set_switch_handler(onoff_switch& switch__, const completion_handler& on_compl)
-        : switch_(switch__), on_compl(on_compl)
+    set_switch_handler(onoff_switch& _switch, const completion_handler& on_compl)
+        : switch_(_switch), on_compl(on_compl)
     { }
 
     void operator () (request* req)
     {
-        on_compl(req);  //call before setting switch to on, otherwise, user has no way to wait for the completion handler to be executed
+        // call before setting switch to on, otherwise, user has no way to wait
+        // for the completion handler to be executed
+        on_compl(req);
         switch_.on();
     }
 };
@@ -51,9 +51,13 @@ public:
 //!
 //! \c block_prefetcher overlaps I/Os with consumption of read data.
 //! Utilizes optimal asynchronous prefetch scheduling (by Peter Sanders et.al.)
-template <typename block_type, typename bid_iterator_type>
+template <typename BlockType, typename BidIteratorType>
 class block_prefetcher : private noncopyable
 {
+public:
+    typedef BlockType block_type;
+    typedef BidIteratorType bid_iterator_type;
+
     typedef typename block_type::bid_type bid_type;
 
 protected:
@@ -105,16 +109,15 @@ public:
         bid_iterator_type _cons_end,
         int_type* _pref_seq,
         int_type _prefetch_buf_size,
-        completion_handler do_after_fetch = default_completion_handler()
-        ) :
-        consume_seq_begin(_cons_begin),
-        consume_seq_end(_cons_end),
-        seq_length(_cons_end - _cons_begin),
-        prefetch_seq(_pref_seq),
-        nextread(STXXL_MIN(unsigned_type(_prefetch_buf_size), seq_length)),
-        nextconsume(0),
-        nreadblocks(nextread),
-        do_after_fetch(do_after_fetch)
+        completion_handler do_after_fetch = completion_handler())
+        : consume_seq_begin(_cons_begin),
+          consume_seq_end(_cons_end),
+          seq_length(_cons_end - _cons_begin),
+          prefetch_seq(_pref_seq),
+          nextread(STXXL_MIN(unsigned_type(_prefetch_buf_size), seq_length)),
+          nextconsume(0),
+          nreadblocks(nextread),
+          do_after_fetch(do_after_fetch)
     {
         STXXL_VERBOSE1("block_prefetcher: seq_length=" << seq_length);
         STXXL_VERBOSE1("block_prefetcher: _prefetch_buf_size=" << _prefetch_buf_size);
@@ -186,7 +189,6 @@ public:
         if (nextconsume >= seq_length)
             return false;
 
-
         buffer = wait(nextconsume++);
 
         return true;
@@ -212,7 +214,6 @@ public:
             if (read_reqs[i].valid())
                 read_reqs[i]->wait();
 
-
         delete[] read_reqs;
         delete[] read_bids;
         delete[] completed;
diff --git a/include/stxxl/bits/mng/block_scheduler.h b/include/stxxl/bits/mng/block_scheduler.h
index bdd3a26..9f0c689 100644
--- a/include/stxxl/bits/mng/block_scheduler.h
+++ b/include/stxxl/bits/mng/block_scheduler.h
@@ -137,7 +137,7 @@ public:
 
     //! Read asyncronusly from external_block to internal_block. Has to be internal and have an external_block.
     //! \return A request pointer to the I/O.
-    request_ptr read_async(completion_handler on_cmpl = default_completion_handler())
+    request_ptr read_async(completion_handler on_cmpl = completion_handler())
     {
         assert(is_internal());
         assert(has_external_block());
@@ -154,7 +154,7 @@ public:
 
     //! Write asyncronusly from internal_block to external_block if necessary.
     //! \return A request pointer to the I/O, an invalid request pointer if not necessary.
-    request_ptr clean_async(completion_handler on_cmpl = default_completion_handler())
+    request_ptr clean_async(completion_handler on_cmpl = completion_handler())
     {
         if (! is_dirty())
             return request_ptr();
@@ -1598,7 +1598,7 @@ protected:
 
     void deinit()
     {
-        // todo remove
+        // TODO remove
         if (! scheduled_blocks.empty())
             STXXL_MSG("deinit while scheduled_blocks not empty");
         if (! scheduled_evictable_blocks.empty())
diff --git a/include/stxxl/bits/mng/buf_istream.h b/include/stxxl/bits/mng/buf_istream.h
index 15570af..d466b16 100644
--- a/include/stxxl/bits/mng/buf_istream.h
+++ b/include/stxxl/bits/mng/buf_istream.h
@@ -18,27 +18,24 @@
 #include <stxxl/bits/noncopyable.h>
 #include <stxxl/bits/algo/async_schedule.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup schedlayer
 //! \{
 
-
 // a paranoid check
 #define BUF_ISTREAM_CHECK_END
 
-
 //! Buffered input stream.
 //!
 //! Reads data records from the stream of blocks.
 //! \remark Reading performed in the background, i.e. with overlapping of I/O and computation
-template <typename BlockType, typename BIDIteratorType>
+template <typename BlockType, typename BidIteratorType>
 class buf_istream : private noncopyable
 {
 public:
     typedef BlockType block_type;
-    typedef BIDIteratorType bid_iterator_type;
+    typedef BidIteratorType bid_iterator_type;
 
 private:
     buf_istream() { }
@@ -55,34 +52,33 @@ protected:
 
 public:
     typedef typename block_type::reference reference;
-    typedef buf_istream<block_type, bid_iterator_type> _Self;
+    typedef buf_istream<block_type, bid_iterator_type> self_type;
 
     //! Constructs input stream object.
-    //! \param _begin \c bid_iterator pointing to the first block of the stream
-    //! \param _end \c bid_iterator pointing to the ( \b last + 1 ) block of the stream
+    //! \param begin \c bid_iterator pointing to the first block of the stream
+    //! \param end \c bid_iterator pointing to the ( \b last + 1 ) block of the stream
     //! \param nbuffers number of buffers for internal use
-    buf_istream(bid_iterator_type _begin, bid_iterator_type _end, int_type nbuffers) :
-        current_elem(0)
+    buf_istream(bid_iterator_type begin, bid_iterator_type end, unsigned_type nbuffers)
+        : current_elem(0)
 #ifdef BUF_ISTREAM_CHECK_END
-        , not_finished(true)
+          , not_finished(true)
 #endif
     {
-        //int_type i;
         const unsigned_type ndisks = config::get_instance()->disks_number();
-        const int_type seq_length = _end - _begin;
+        const unsigned_type mdevid = config::get_instance()->get_max_device_id();
+        const int_type seq_length = end - begin;
         prefetch_seq = new int_type[seq_length];
 
         // obvious schedule
-        //for(int_type i = 0; i< seq_length; ++i)
+        //for(int_type i = 0; i < seq_length; ++i)
         //	prefetch_seq[i] = i;
 
         // optimal schedule
         nbuffers = STXXL_MAX(2 * ndisks, unsigned_type(nbuffers - 1));
-        compute_prefetch_schedule(_begin, _end, prefetch_seq,
-                                  nbuffers, ndisks);
-
+        compute_prefetch_schedule(begin, end, prefetch_seq,
+                                  nbuffers, mdevid);
 
-        prefetcher = new prefetcher_type(_begin, _end, prefetch_seq, nbuffers);
+        prefetcher = new prefetcher_type(begin, end, prefetch_seq, nbuffers);
 
         current_blk = prefetcher->pull_block();
     }
@@ -91,7 +87,7 @@ public:
     //! \param record reference to the block record type,
     //!        contains value of the next record in the stream after the call of the operator
     //! \return reference to itself (stream object)
-    _Self& operator >> (reference record)
+    self_type& operator >> (reference record)
     {
 #ifdef BUF_ISTREAM_CHECK_END
         assert(not_finished);
@@ -126,7 +122,7 @@ public:
 
     //! Moves to the next record in the stream.
     //! \return reference to itself after the advance
-    _Self& operator ++ ()
+    self_type& operator ++ ()
     {
 #ifdef BUF_ISTREAM_CHECK_END
         assert(not_finished);
diff --git a/include/stxxl/bits/mng/buf_istream_reverse.h b/include/stxxl/bits/mng/buf_istream_reverse.h
index f71b9fc..433dacc 100644
--- a/include/stxxl/bits/mng/buf_istream_reverse.h
+++ b/include/stxxl/bits/mng/buf_istream_reverse.h
@@ -20,30 +20,27 @@
 #include <stxxl/bits/noncopyable.h>
 #include <stxxl/bits/algo/async_schedule.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup schedlayer
 //! \{
 
-
 // a paranoid check
 #define BUF_ISTREAM_CHECK_END
 
-
 //! Buffered input stream, reading the items in the blocks in reverse order.
 //!
 //! Reads data records from the stream of blocks in reverse order.
 //! \remark Reading performed in the background, i.e. with overlapping of I/O and computation
-template <typename BlockType, typename BIDIteratorType>
+template <typename BlockType, typename BidIteratorType>
 class buf_istream_reverse : private noncopyable
 {
 public:
     typedef BlockType block_type;
-    typedef BIDIteratorType bid_iterator_type;
+    typedef BidIteratorType bid_iterator_type;
 
     //-tb note that we redefine the BID type here, because there is no way to
-    //-derive it from BIDIteratorType (which is usually just a POD pointer).
+    //-derive it from BidIteratorType (which is usually just a POD pointer).
     typedef BIDArray<block_type::raw_size> bid_vector_type;
 
 private:
@@ -62,7 +59,7 @@ protected:
 
 public:
     typedef typename block_type::reference reference;
-    typedef buf_istream_reverse<block_type, bid_iterator_type> _Self;
+    typedef buf_istream_reverse<block_type, bid_iterator_type> self_type;
 
     //! Constructs input stream object, reading [first,last) blocks in reverse.
     //! \param begin \c bid_iterator pointing to the first block of the stream
@@ -80,13 +77,14 @@ public:
 
         // calculate prefetch sequence
         const unsigned_type ndisks = config::get_instance()->disks_number();
+        const unsigned_type mdevid = config::get_instance()->get_max_device_id();
 
         prefetch_seq = new int_type[bids_.size()];
 
         // optimal schedule
         nbuffers = STXXL_MAX(2 * ndisks, unsigned_type(nbuffers - 1));
         compute_prefetch_schedule(bids_.begin(), bids_.end(), prefetch_seq,
-                                  nbuffers, ndisks);
+                                  nbuffers, mdevid);
 
         // create stream prefetcher
         prefetcher = new prefetcher_type(bids_.begin(), bids_.end(), prefetch_seq, nbuffers);
@@ -100,7 +98,7 @@ public:
     //! \param record reference to the block record type,
     //!        contains value of the next record in the stream after the call of the operator
     //! \return reference to itself (stream object)
-    _Self& operator >> (reference record)
+    self_type& operator >> (reference record)
     {
 #ifdef BUF_ISTREAM_CHECK_END
         assert(not_finished);
@@ -135,7 +133,7 @@ public:
 
     //! Moves to the _previous_ record in the stream.
     //! \return reference to itself after the advance
-    _Self& operator ++ ()
+    self_type& operator ++ ()
     {
 #ifdef BUF_ISTREAM_CHECK_END
         assert(not_finished);
diff --git a/include/stxxl/bits/mng/buf_ostream.h b/include/stxxl/bits/mng/buf_ostream.h
index e5b5cc8..048116f 100644
--- a/include/stxxl/bits/mng/buf_ostream.h
+++ b/include/stxxl/bits/mng/buf_ostream.h
@@ -16,23 +16,21 @@
 #include <stxxl/bits/noncopyable.h>
 #include <stxxl/bits/mng/buf_writer.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup schedlayer
 //! \{
 
-
 //! Buffered output stream.
 //!
 //! Writes data records to the stream of blocks.
 //! \remark Writing performed in the background, i.e. with overlapping of I/O and computation
-template <typename BlockType, typename BIDIteratorType>
+template <typename BlockType, typename BidIteratorType>
 class buf_ostream : private noncopyable
 {
 public:
     typedef BlockType block_type;
-    typedef BIDIteratorType bid_iterator_type;
+    typedef BidIteratorType bid_iterator_type;
 
 protected:
     buffered_writer<block_type> writer;
@@ -43,14 +41,14 @@ protected:
 public:
     typedef typename block_type::const_reference const_reference;
     typedef typename block_type::reference reference;
-    typedef buf_ostream<block_type, bid_iterator_type> _Self;
+    typedef buf_ostream<block_type, bid_iterator_type> self_type;
 
     //! Constructs output stream object.
     //! \param first_bid \c bid_iterator pointing to the first block of the stream
     //! \param nbuffers number of buffers for internal use
-    buf_ostream(bid_iterator_type first_bid, int_type nbuffers) :
-        writer(nbuffers, nbuffers / 2), current_bid(first_bid),
-        current_elem(0)
+    buf_ostream(bid_iterator_type first_bid, int_type nbuffers)
+        : writer(nbuffers, nbuffers / 2), current_bid(first_bid),
+          current_elem(0)
     {
         current_blk = writer.get_free_block();
     }
@@ -58,7 +56,7 @@ public:
     //! Output stream operator, writes out \c record.
     //! \param record const reference to block record type, containing a value of record to write to the stream
     //! \return reference to itself (stream object)
-    _Self& operator << (const_reference record)
+    self_type& operator << (const_reference record)
     {
         current_blk->elem[current_elem++] = record;
         if (UNLIKELY(current_elem >= block_type::size))
@@ -85,7 +83,7 @@ public:
 
     //! Moves to the next record in the stream.
     //! \return reference to itself after the advance
-    _Self& operator ++ ()
+    self_type& operator ++ ()
     {
         ++current_elem;
         if (UNLIKELY(current_elem >= block_type::size))
@@ -97,7 +95,7 @@ public:
     }
 
     //! Fill current block with padding and flush
-    _Self & fill(const_reference record)
+    self_type & fill(const_reference record)
     {
         while (current_elem != 0)
         {
@@ -108,7 +106,7 @@ public:
 
     //! Force flush of current block, for finishing writing within a block.
     //! \warning Use with caution as the block may contain uninitialized data
-    _Self & flush()
+    self_type & flush()
     {
         current_elem = 0;
         current_blk = writer.write(current_blk, *(current_bid++));
diff --git a/include/stxxl/bits/mng/buf_writer.h b/include/stxxl/bits/mng/buf_writer.h
index 2db5818..2aa81b3 100644
--- a/include/stxxl/bits/mng/buf_writer.h
+++ b/include/stxxl/bits/mng/buf_writer.h
@@ -20,7 +20,6 @@
 #include <stxxl/bits/io/disk_queues.h>
 #include <stxxl/bits/noncopyable.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \defgroup schedlayer Block Scheduling Sublayer
@@ -30,16 +29,16 @@ STXXL_BEGIN_NAMESPACE
 //! via prefetching and buffered writing
 //! \{
 
-
 //! Encapsulates asynchronous buffered block writing engine.
 //!
 //! \c buffered_writer overlaps I/Os with filling of output buffer.
-template <typename block_type>
+template <typename BlockType>
 class buffered_writer : private noncopyable
 {
-protected:
+    typedef BlockType block_type;
     typedef typename block_type::bid_type bid_type;
 
+protected:
     const unsigned_type nwriteblocks;
     block_type* write_buffers;
     bid_type* write_bids;
@@ -72,12 +71,13 @@ public:
     //! \param write_buf_size number of write buffers to use
     //! \param write_batch_size number of blocks to accumulate in
     //!        order to flush write requests (bulk buffered writing)
-    buffered_writer(unsigned_type write_buf_size, unsigned_type write_batch_size) :
-        nwriteblocks((write_buf_size > 2) ? write_buf_size : 2),
-        writebatchsize(write_batch_size ? write_batch_size : 1)
+    buffered_writer(unsigned_type write_buf_size, unsigned_type write_batch_size)
+        : nwriteblocks((write_buf_size > 2) ? write_buf_size : 2),
+          writebatchsize(write_batch_size ? write_batch_size : 1)
     {
         write_buffers = new block_type[nwriteblocks];
         write_reqs = new request_ptr[nwriteblocks];
+
         write_bids = new bid_type[nwriteblocks];
 
         for (unsigned_type i = 0; i < nwriteblocks; i++)
@@ -186,7 +186,7 @@ public:
     }
 
     //! Flushes not yet written buffers and frees used memory.
-    virtual ~buffered_writer()
+    ~buffered_writer()
     {
         int_type ibuffer;
         while (!batch_write_blocks.empty())
diff --git a/include/stxxl/bits/mng/config.h b/include/stxxl/bits/mng/config.h
index 6070429..ea6a1a0 100644
--- a/include/stxxl/bits/mng/config.h
+++ b/include/stxxl/bits/mng/config.h
@@ -24,7 +24,6 @@
 #include <stxxl/bits/singleton.h>
 #include <stxxl/bits/common/log.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup mnglayer
@@ -91,12 +90,19 @@ public:
     //! different disks. queue=-1 -> default queue (one for each disk).
     int queue;
 
+    //! the selected physical device id (e.g. for calculating prefetching
+    //! sequences). If -1 then the device id is chosen automatically.
+    unsigned int device_id;
+
     //! turned on by syscall fileio when the path points to a raw block device
     bool raw_device;
 
     //! unlink file immediately after opening (available on most Unix)
     bool unlink_on_open;
 
+    //! desired queue length for linuxaio_file and linuxaio_queue
+    int queue_length;
+
     //! \}
 };
 
@@ -169,6 +175,25 @@ public:
 
     //! \}
 
+protected:
+    //! \name Automatic Disk Enumeration Functions
+    //! \{
+
+    //! static counter for automatic physical device enumeration
+    unsigned int m_max_device_id;
+
+public:
+    //! Returns automatic physical device id counter
+    unsigned int get_max_device_id();
+
+    //! Returns next automatic physical device id counter
+    unsigned int get_next_device_id();
+
+    //! Update the automatic physical device id counter
+    void update_max_device_id(unsigned int devid);
+
+    //! \}
+
 public:
     //! \name Query Functions
     //! \{
diff --git a/include/stxxl/bits/mng/disk_allocator.h b/include/stxxl/bits/mng/disk_allocator.h
index f0e4a79..67f9145 100644
--- a/include/stxxl/bits/mng/disk_allocator.h
+++ b/include/stxxl/bits/mng/disk_allocator.h
@@ -33,7 +33,6 @@
 #include <ostream>
 #include <utility>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \ingroup mnglayer
@@ -117,30 +116,30 @@ public:
         return disk_bytes;
     }
 
-    template <unsigned BLK_SIZE>
-    void new_blocks(BIDArray<BLK_SIZE>& bids)
+    template <unsigned BlockSize>
+    void new_blocks(BIDArray<BlockSize>& bids)
     {
         new_blocks(bids.begin(), bids.end());
     }
 
-    template <unsigned BLK_SIZE>
-    void new_blocks(BID<BLK_SIZE>* begin, BID<BLK_SIZE>* end);
+    template <unsigned BlockSize>
+    void new_blocks(BID<BlockSize>* begin, BID<BlockSize>* end);
 
 #if 0
-    template <unsigned BLK_SIZE>
-    void delete_blocks(const BIDArray<BLK_SIZE>& bids)
+    template <unsigned BlockSize>
+    void delete_blocks(const BIDArray<BlockSize>& bids)
     {
         for (unsigned i = 0; i < bids.size(); ++i)
             delete_block(bids[i]);
     }
 #endif
 
-    template <unsigned BLK_SIZE>
-    void delete_block(const BID<BLK_SIZE>& bid)
+    template <unsigned BlockSize>
+    void delete_block(const BID<BlockSize>& bid)
     {
         scoped_mutex_lock lock(mutex);
 
-        STXXL_VERBOSE2("disk_allocator::delete_block<" << BLK_SIZE <<
+        STXXL_VERBOSE2("disk_allocator::delete_block<" << BlockSize <<
                        ">(pos=" << bid.offset << ", size=" << bid.size <<
                        "), free:" << free_bytes << " total:" << disk_bytes);
 
@@ -148,12 +147,12 @@ public:
     }
 };
 
-template <unsigned BLK_SIZE>
-void disk_allocator::new_blocks(BID<BLK_SIZE>* begin, BID<BLK_SIZE>* end)
+template <unsigned BlockSize>
+void disk_allocator::new_blocks(BID<BlockSize>* begin, BID<BlockSize>* end)
 {
     stxxl::int64 requested_size = 0;
 
-    for (typename BIDArray<BLK_SIZE>::iterator cur = begin; cur != end; ++cur)
+    for (typename BIDArray<BlockSize>::iterator cur = begin; cur != end; ++cur)
     {
         STXXL_VERBOSE2("Asking for a block with size: " << (cur->size));
         requested_size += cur->size;
@@ -161,7 +160,7 @@ void disk_allocator::new_blocks(BID<BLK_SIZE>* begin, BID<BLK_SIZE>* end)
 
     scoped_mutex_lock lock(mutex);
 
-    STXXL_VERBOSE2("disk_allocator::new_blocks<BLK_SIZE>,  BLK_SIZE = " << BLK_SIZE <<
+    STXXL_VERBOSE2("disk_allocator::new_blocks<BlockSize>,  BlockSize = " << BlockSize <<
                    ", free:" << free_bytes << " total:" << disk_bytes <<
                    ", blocks: " << (end - begin) <<
                    " begin: " << static_cast<void*>(begin) <<
@@ -185,7 +184,7 @@ void disk_allocator::new_blocks(BID<BLK_SIZE>* begin, BID<BLK_SIZE>* end)
     space = std::find_if(free_space.begin(), free_space.end(),
                          bind2nd(first_fit(), requested_size) _STXXL_FORCE_SEQUENTIAL);
 
-    if (space == free_space.end() && requested_size == BLK_SIZE)
+    if (space == free_space.end() && requested_size == BlockSize)
     {
         assert(end - begin == 1);
 
@@ -198,7 +197,7 @@ void disk_allocator::new_blocks(BID<BLK_SIZE>* begin, BID<BLK_SIZE>* end)
                          " bytes free. Trying to extend the external memory space...");
         }
 
-        grow_file(BLK_SIZE);
+        grow_file(BlockSize);
 
         space = std::find_if(free_space.begin(), free_space.end(),
                              bind2nd(first_fit(), requested_size) _STXXL_FORCE_SEQUENTIAL);
@@ -227,12 +226,12 @@ void disk_allocator::new_blocks(BID<BLK_SIZE>* begin, BID<BLK_SIZE>* end)
     STXXL_VERBOSE1("Warning, when allocating an external memory space, no contiguous region found");
     STXXL_VERBOSE1("It might harm the performance");
 
-    assert(requested_size > BLK_SIZE);
+    assert(requested_size > BlockSize);
     assert(end - begin > 1);
 
     lock.unlock();
 
-    BID<BLK_SIZE>* middle = begin + ((end - begin) / 2);
+    BID<BlockSize>* middle = begin + ((end - begin) / 2);
     new_blocks(begin, middle);
     new_blocks(middle, end);
 }
diff --git a/include/stxxl/bits/mng/prefetch_pool.h b/include/stxxl/bits/mng/prefetch_pool.h
index e475d7c..cb10a48 100644
--- a/include/stxxl/bits/mng/prefetch_pool.h
+++ b/include/stxxl/bits/mng/prefetch_pool.h
@@ -19,7 +19,6 @@
 #include <stxxl/bits/mng/write_pool.h>
 #include <stxxl/bits/compat/hash_map.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup schedlayer
@@ -265,7 +264,6 @@ public:
             while (--diff >= 0)
                 free_blocks.push_back(new block_type);
 
-
             return size();
         }
 
diff --git a/include/stxxl/bits/mng/read_write_pool.h b/include/stxxl/bits/mng/read_write_pool.h
index 15b0874..6bdeac2 100644
--- a/include/stxxl/bits/mng/read_write_pool.h
+++ b/include/stxxl/bits/mng/read_write_pool.h
@@ -16,7 +16,6 @@
 #include <stxxl/bits/mng/write_pool.h>
 #include <stxxl/bits/mng/prefetch_pool.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup schedlayer
@@ -43,15 +42,15 @@ public:
     //! Constructs pool.
     //! \param init_size_prefetch initial number of blocks in the prefetch pool
     //! \param init_size_write initial number of blocks in the write pool
-    explicit read_write_pool(size_type init_size_prefetch = 1, size_type init_size_write = 1) :
-        delete_pools(true)
+    explicit read_write_pool(size_type init_size_prefetch = 1, size_type init_size_write = 1)
+        : delete_pools(true)
     {
         w_pool = new write_pool_type(init_size_write);
         p_pool = new prefetch_pool_type(init_size_prefetch);
     }
 
-    STXXL_DEPRECATED(read_write_pool(prefetch_pool_type& p_pool, write_pool_type& w_pool)) :
-        w_pool(&w_pool), p_pool(&p_pool), delete_pools(false)
+    STXXL_DEPRECATED(read_write_pool(prefetch_pool_type& p_pool, write_pool_type& w_pool))
+        : w_pool(&w_pool), p_pool(&p_pool), delete_pools(false)
     { }
 
     void swap(read_write_pool& obj)
@@ -62,7 +61,7 @@ public:
     }
 
     //! Waits for completion of all ongoing requests and frees memory.
-    virtual ~read_write_pool()
+    ~read_write_pool()
     {
         if (delete_pools) {
             delete w_pool;
@@ -157,7 +156,6 @@ public:
 
 STXXL_END_NAMESPACE
 
-
 namespace std {
 
 template <class BlockType>
diff --git a/include/stxxl/bits/mng/typed_block.h b/include/stxxl/bits/mng/typed_block.h
index 8b86f68..a134941 100644
--- a/include/stxxl/bits/mng/typed_block.h
+++ b/include/stxxl/bits/mng/typed_block.h
@@ -27,7 +27,6 @@
 #define STXXL_VERBOSE_TYPED_BLOCK STXXL_VERBOSE2
 #endif
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup mnglayer
@@ -169,8 +168,8 @@ public:
 
 //! Contains per block information for \c stxxl::typed_block , not intended for direct use.
 template <typename Type, unsigned RawSize, unsigned NBids, typename MetaInfoType = void>
-class block_w_info :
-    public block_w_bids<Type, ((RawSize - sizeof(BID<RawSize>)* NBids - sizeof(MetaInfoType)) / sizeof(Type)), RawSize, NBids>
+class block_w_info
+    : public block_w_bids<Type, ((RawSize - sizeof(BID<RawSize>)* NBids - sizeof(MetaInfoType)) / sizeof(Type)), RawSize, NBids>
 {
 public:
     //! Type of per block information element.
@@ -183,8 +182,8 @@ public:
 };
 
 template <typename Type, unsigned RawSize, unsigned NBids>
-class block_w_info<Type, RawSize, NBids, void>:
-    public block_w_bids<Type, ((RawSize - sizeof(BID<RawSize>)* NBids) / sizeof(Type)), RawSize, NBids>
+class block_w_info<Type, RawSize, NBids, void>
+    : public block_w_bids<Type, ((RawSize - sizeof(BID<RawSize>)* NBids) / sizeof(Type)), RawSize, NBids>
 {
 public:
     typedef void info_type;
@@ -235,8 +234,8 @@ class expand_struct : public add_filler<Type, RawSize - sizeof(Type)>
 //! function variable for example), because Linux POSIX library limits the stack size for the
 //! main thread to (2MB - system page size)
 template <unsigned RawSize, typename Type, unsigned NRef = 0, typename MetaInfoType = void>
-class typed_block :
-    public mng_local::expand_struct<mng_local::block_w_info<Type, RawSize, NRef, MetaInfoType>, RawSize>
+class typed_block
+    : public mng_local::expand_struct<mng_local::block_w_info<Type, RawSize, NRef, MetaInfoType>, RawSize>
 {
     typedef mng_local::expand_struct<mng_local::block_w_info<Type, RawSize, NRef, MetaInfoType>, RawSize> Base;
 
@@ -263,7 +262,7 @@ public:
         STXXL_STATIC_ASSERT(sizeof(typed_block) == raw_size);
         STXXL_VERBOSE_TYPED_BLOCK("[" << (void*)this << "] typed_block is constructed");
 #if 0
-        assert(((long)this) % BLOCK_ALIGN == 0);
+        assert(((long)this) % STXXL_BLOCK_ALIGN == 0);
 #endif
     }
 
@@ -282,7 +281,7 @@ public:
      *! \return \c pointer_ptr object to track status I/O operation after the call
      */
     request_ptr write(const bid_type& bid,
-                      completion_handler on_cmpl = default_completion_handler())
+                      completion_handler on_cmpl = completion_handler())
     {
         STXXL_VERBOSE_BLOCK_LIFE_CYCLE("BLC:write  " << FMT_BID(bid));
         return bid.storage->awrite(this, bid.offset, raw_size, on_cmpl);
@@ -294,7 +293,7 @@ public:
      *! \return \c pointer_ptr object to track status I/O operation after the call
      */
     request_ptr read(const bid_type& bid,
-                     completion_handler on_cmpl = default_completion_handler())
+                     completion_handler on_cmpl = completion_handler())
     {
         STXXL_VERBOSE_BLOCK_LIFE_CYCLE("BLC:read   " << FMT_BID(bid));
         return bid.storage->aread(this, bid.offset, raw_size, on_cmpl);
@@ -303,9 +302,11 @@ public:
     static void* operator new (size_t bytes)
     {
         unsigned_type meta_info_size = bytes % raw_size;
-        STXXL_VERBOSE1("typed::block operator new[]: bytes=" << bytes << ", meta_info_size=" << meta_info_size);
+        STXXL_VERBOSE_TYPED_BLOCK("typed::block operator new[]: bytes=" << bytes << ", meta_info_size=" << meta_info_size);
+
+        void* result = aligned_alloc<STXXL_BLOCK_ALIGN>(
+            bytes - meta_info_size, meta_info_size);
 
-        void* result = aligned_alloc<BLOCK_ALIGN>(bytes - meta_info_size, meta_info_size);
 #if STXXL_WITH_VALGRIND || STXXL_TYPED_BLOCK_INITIALIZE_ZERO
         memset(result, 0, bytes);
 #endif
@@ -315,9 +316,11 @@ public:
     static void* operator new[] (size_t bytes)
     {
         unsigned_type meta_info_size = bytes % raw_size;
-        STXXL_VERBOSE1("typed::block operator new[]: bytes=" << bytes << ", meta_info_size=" << meta_info_size);
+        STXXL_VERBOSE_TYPED_BLOCK("typed::block operator new[]: bytes=" << bytes << ", meta_info_size=" << meta_info_size);
+
+        void* result = aligned_alloc<STXXL_BLOCK_ALIGN>(
+            bytes - meta_info_size, meta_info_size);
 
-        void* result = aligned_alloc<BLOCK_ALIGN>(bytes - meta_info_size, meta_info_size);
 #if STXXL_WITH_VALGRIND || STXXL_TYPED_BLOCK_INITIALIZE_ZERO
         memset(result, 0, bytes);
 #endif
@@ -331,12 +334,12 @@ public:
 
     static void operator delete (void* ptr)
     {
-        aligned_dealloc<BLOCK_ALIGN>(ptr);
+        aligned_dealloc<STXXL_BLOCK_ALIGN>(ptr);
     }
 
     static void operator delete[] (void* ptr)
     {
-        aligned_dealloc<BLOCK_ALIGN>(ptr);
+        aligned_dealloc<STXXL_BLOCK_ALIGN>(ptr);
     }
 
     static void operator delete (void*, void*)
diff --git a/include/stxxl/bits/mng/write_pool.h b/include/stxxl/bits/mng/write_pool.h
index ca5a63b..8c2f11f 100644
--- a/include/stxxl/bits/mng/write_pool.h
+++ b/include/stxxl/bits/mng/write_pool.h
@@ -22,13 +22,11 @@
 
 #define STXXL_VERBOSE_WPOOL(msg) STXXL_VERBOSE1("write_pool[" << static_cast<void*>(this) << "]" << msg)
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! \addtogroup schedlayer
 //! \{
 
-
 //! Implements dynamically resizable buffered writing pool.
 template <class BlockType>
 class write_pool : private noncopyable
@@ -46,8 +44,8 @@ public:
 
         busy_entry() : block(NULL) { }
         busy_entry(const busy_entry& a) : block(a.block), req(a.req), bid(a.bid) { }
-        busy_entry(block_type*& bl, request_ptr& r, bid_type& bi) :
-            block(bl), req(r), bid(bi) { }
+        busy_entry(block_type*& bl, request_ptr& r, bid_type& bi)
+            : block(bl), req(r), bid(bi) { }
 
         operator request_ptr () { return req; }
     };
@@ -79,7 +77,7 @@ public:
     }
 
     //! Waits for completion of all ongoing write requests and frees memory.
-    virtual ~write_pool()
+    ~write_pool()
     {
         STXXL_VERBOSE_WPOOL("::~write_pool free_blocks.size()=" << free_blocks.size() <<
                             " busy_blocks.size()=" << busy_blocks.size());
@@ -276,7 +274,6 @@ protected:
 
 STXXL_END_NAMESPACE
 
-
 namespace std {
 
 template <class BlockType>
diff --git a/include/stxxl/bits/parallel.h b/include/stxxl/bits/parallel.h
index 484b649..e4088b3 100644
--- a/include/stxxl/bits/parallel.h
+++ b/include/stxxl/bits/parallel.h
@@ -43,7 +43,6 @@
 #include <stxxl/bits/common/settings.h>
 #include <stxxl/bits/verbose.h>
 
-
 #if defined(_GLIBCXX_PARALLEL)
 //use _STXXL_FORCE_SEQUENTIAL to tag calls which are not worthwhile parallelizing
 #define _STXXL_FORCE_SEQUENTIAL , __gnu_parallel::sequential_tag()
@@ -83,7 +82,6 @@
 #include <algorithm>
 #endif
 
-
 STXXL_BEGIN_NAMESPACE
 
 inline unsigned sort_memory_usage_factor()
@@ -123,7 +121,6 @@ inline bool do_parallel_merge()
 #endif
 }
 
-
 namespace potentially_parallel {
 
 #if STXXL_PARALLEL_MODE_EXPLICIT
diff --git a/include/stxxl/bits/singleton.h b/include/stxxl/bits/singleton.h
index 8c92652..6c31fb3 100644
--- a/include/stxxl/bits/singleton.h
+++ b/include/stxxl/bits/singleton.h
@@ -20,7 +20,6 @@
 #include <stxxl/bits/common/mutex.h>
 #include <stxxl/bits/common/exithandler.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 template <typename INSTANCE, bool destroy_on_exit = true>
diff --git a/include/stxxl/bits/stream/choose.h b/include/stxxl/bits/stream/choose.h
index 1fee966..102d226 100644
--- a/include/stxxl/bits/stream/choose.h
+++ b/include/stxxl/bits/stream/choose.h
@@ -16,7 +16,6 @@
 
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! Stream package subnamespace.
@@ -26,28 +25,28 @@ namespace stream {
 //     CHOOSE                                                         //
 ////////////////////////////////////////////////////////////////////////
 
-template <class Input_, int Which>
+template <class Input, int Which>
 class choose
 { };
 
 //! Creates stream from a tuple stream taking the first component of each tuple.
 //!
-//! \tparam Input_ type of the input tuple stream
+//! \tparam Input type of the input tuple stream
 //!
 //! \remark Tuple stream is a stream which \c value_type is \c stxxl::tuple .
-template <class Input_>
-class choose<Input_, 1>
+template <class Input>
+class choose<Input, 1>
 {
-    Input_& in;
+    Input& in;
 
-    typedef typename Input_::value_type tuple_type;
+    typedef typename Input::value_type tuple_type;
 
 public:
     //! Standard stream typedef.
     typedef typename tuple_type::first_type value_type;
 
     //! Construction.
-    choose(Input_& in_) : in(in_)
+    choose(Input& in_) : in(in_)
     { }
 
     //! Standard stream method.
@@ -77,22 +76,22 @@ public:
 
 //! Creates stream from a tuple stream taking the second component of each tuple.
 //!
-//! \tparam Input_ type of the input tuple stream
+//! \tparam Input type of the input tuple stream
 //!
 //! \remark Tuple stream is a stream which \c value_type is \c stxxl::tuple .
-template <class Input_>
-class choose<Input_, 2>
+template <class Input>
+class choose<Input, 2>
 {
-    Input_& in;
+    Input& in;
 
-    typedef typename Input_::value_type tuple_type;
+    typedef typename Input::value_type tuple_type;
 
 public:
     //! Standard stream typedef.
     typedef typename tuple_type::second_type value_type;
 
     //! Construction.
-    choose(Input_& in_) : in(in_)
+    choose(Input& in_) : in(in_)
     { }
 
     //! Standard stream method.
@@ -122,22 +121,22 @@ public:
 
 //! Creates stream from a tuple stream taking the third component of each tuple.
 //!
-//! \tparam Input_ type of the input tuple stream
+//! \tparam Input type of the input tuple stream
 //!
 //! \remark Tuple stream is a stream which \c value_type is \c stxxl::tuple .
-template <class Input_>
-class choose<Input_, 3>
+template <class Input>
+class choose<Input, 3>
 {
-    Input_& in;
+    Input& in;
 
-    typedef typename Input_::value_type tuple_type;
+    typedef typename Input::value_type tuple_type;
 
 public:
     //! Standard stream typedef.
     typedef typename tuple_type::third_type value_type;
 
     //! Construction.
-    choose(Input_& in_) : in(in_)
+    choose(Input& in_) : in(in_)
     { }
 
     //! Standard stream method.
@@ -167,22 +166,22 @@ public:
 
 //! Creates stream from a tuple stream taking the fourth component of each tuple.
 //!
-//! \tparam Input_ type of the input tuple stream
+//! \tparam Input type of the input tuple stream
 //!
 //! \remark Tuple stream is a stream which \c value_type is \c stxxl::tuple .
-template <class Input_>
-class choose<Input_, 4>
+template <class Input>
+class choose<Input, 4>
 {
-    Input_& in;
+    Input& in;
 
-    typedef typename Input_::value_type tuple_type;
+    typedef typename Input::value_type tuple_type;
 
 public:
     //! Standard stream typedef.
     typedef typename tuple_type::fourth_type value_type;
 
     //! Construction.
-    choose(Input_& in_) : in(in_)
+    choose(Input& in_) : in(in_)
     { }
 
     //! Standard stream method.
@@ -212,22 +211,22 @@ public:
 
 //! Creates stream from a tuple stream taking the fifth component of each tuple.
 //!
-//! \tparam Input_ type of the input tuple stream
+//! \tparam Input type of the input tuple stream
 //!
 //! \remark Tuple stream is a stream which \c value_type is \c stxxl::tuple .
-template <class Input_>
-class choose<Input_, 5>
+template <class Input>
+class choose<Input, 5>
 {
-    Input_& in;
+    Input& in;
 
-    typedef typename Input_::value_type tuple_type;
+    typedef typename Input::value_type tuple_type;
 
 public:
     //! Standard stream typedef.
     typedef typename tuple_type::fifth_type value_type;
 
     //! Construction.
-    choose(Input_& in_) : in(in_)
+    choose(Input& in_) : in(in_)
     { }
 
     //! Standard stream method.
@@ -257,22 +256,22 @@ public:
 
 //! Creates stream from a tuple stream taking the sixth component of each tuple.
 //!
-//! \tparam Input_ type of the input tuple stream
+//! \tparam Input type of the input tuple stream
 //!
 //! \remark Tuple stream is a stream which \c value_type is \c stxxl::tuple .
-template <class Input_>
-class choose<Input_, 6>
+template <class Input>
+class choose<Input, 6>
 {
-    Input_& in;
+    Input& in;
 
-    typedef typename Input_::value_type tuple_type;
+    typedef typename Input::value_type tuple_type;
 
 public:
     //! Standard stream typedef.
     typedef typename tuple_type::sixth_type value_type;
 
     //! Construction.
-    choose(Input_& in_) : in(in_)
+    choose(Input& in_) : in(in_)
     { }
 
     //! Standard stream method.
@@ -306,9 +305,7 @@ public:
 
 STXXL_END_NAMESPACE
 
-
 #include <stxxl/bits/stream/unique.h>
 
-
 #endif // !STXXL_STREAM_CHOOSE_HEADER
 // vim: et:ts=4:sw=4
diff --git a/include/stxxl/bits/stream/sort_stream.h b/include/stxxl/bits/stream/sort_stream.h
index 861a06f..8eadf2c 100644
--- a/include/stxxl/bits/stream/sort_stream.h
+++ b/include/stxxl/bits/stream/sort_stream.h
@@ -39,45 +39,52 @@ namespace stream {
 
 //! Forms sorted runs of data from a stream.
 //!
-//! \tparam Input_ type of the input stream
-//! \tparam CompareType_ type of comparison object used for sorting the runs
-//! \tparam BlockSize_ size of blocks used to store the runs (in bytes)
-//! \tparam AllocStr_ functor that defines allocation strategy for the runs
+//! \tparam Input type of the input stream
+//! \tparam CompareType type of comparison object used for sorting the runs
+//! \tparam BlockSize size of blocks used to store the runs (in bytes)
+//! \tparam AllocStr functor that defines allocation strategy for the runs
 template <
-    class Input_,
-    class CompareType_,
-    unsigned BlockSize_ = STXXL_DEFAULT_BLOCK_SIZE(typename Input_::value_type),
-    class AllocStr_ = STXXL_DEFAULT_ALLOC_STRATEGY>
+    class Input,
+    class CompareType,
+    unsigned BlockSize = STXXL_DEFAULT_BLOCK_SIZE(typename Input::value_type),
+    class AllocStr = STXXL_DEFAULT_ALLOC_STRATEGY>
 class basic_runs_creator : private noncopyable
 {
 public:
-    typedef Input_ input_type;
-    typedef CompareType_ cmp_type;
-    static const unsigned block_size = BlockSize_;
-    typedef AllocStr_ allocation_strategy_type;
+    typedef Input input_type;
+    typedef CompareType cmp_type;
+    static const unsigned block_size = BlockSize;
+    typedef AllocStr allocation_strategy_type;
 
 public:
-    typedef typename Input_::value_type value_type;
-    typedef typed_block<BlockSize_, value_type> block_type;
+    typedef typename Input::value_type value_type;
+    typedef typed_block<BlockSize, value_type> block_type;
     typedef sort_helper::trigger_entry<block_type> trigger_entry_type;
     typedef sorted_runs<trigger_entry_type, cmp_type> sorted_runs_data_type;
     typedef typename sorted_runs_data_type::run_type run_type;
     typedef counting_ptr<sorted_runs_data_type> sorted_runs_type;
 
+    typedef typename element_iterator_traits<block_type, external_size_type>::element_iterator element_iterator;
+
 protected:
-    Input_& m_input;                    //! reference to the input stream
-    CompareType_ m_cmp;                 //! comparator used to sort block groups
+    //! reference to the input stream
+    Input& m_input;
+    //! comparator used to sort block groups
+    CompareType m_cmp;
 
 private:
-    sorted_runs_type m_result;          //! stores the result (sorted runs) as smart pointer
-    unsigned_type m_memsize;            //! memory for internal use in blocks
-    bool m_result_computed;             //! true iff result is already computed (used in 'result()' method)
+    //! stores the result (sorted runs) as smart pointer
+    sorted_runs_type m_result;
+    //! memory for internal use in blocks
+    unsigned_type m_memsize;
+    //! true iff result is already computed (used in 'result()' method)
+    bool m_result_computed;
 
     //! Fetch data from input into blocks[first_idx,last_idx).
-    unsigned_type fetch(block_type* blocks, unsigned_type first_idx, unsigned_type last_idx)
+    unsigned_type fetch(block_type* blocks,
+                        unsigned_type first_idx, unsigned_type last_idx)
     {
-        typename element_iterator_traits<block_type>::element_iterator output =
-            make_element_iterator(blocks, first_idx);
+        element_iterator output = make_element_iterator(blocks, first_idx);
         unsigned_type curr_idx = first_idx;
         while (!m_input.empty() && curr_idx != last_idx) {
             *output = *m_input;
@@ -88,13 +95,13 @@ private:
         return curr_idx;
     }
 
-    //!  fill the rest of the block with max values
-    void fill_with_max_value(block_type* blocks, unsigned_type num_blocks, unsigned_type first_idx)
+    //! fill the rest of the block with max values
+    void fill_with_max_value(block_type* blocks, unsigned_type num_blocks,
+                             unsigned_type first_idx)
     {
         unsigned_type last_idx = num_blocks * block_type::size;
         if (first_idx < last_idx) {
-            typename element_iterator_traits<block_type>::element_iterator curr =
-                make_element_iterator(blocks, first_idx);
+            element_iterator curr = make_element_iterator(blocks, first_idx);
             while (first_idx != last_idx) {
                 *curr = m_cmp.max_value();
                 ++curr;
@@ -118,17 +125,21 @@ public:
     //! Create the object.
     //! \param input input stream
     //! \param cmp comparator object
-    //! \param memory_to_use memory amount that is allowed to used by the sorter in bytes
-    basic_runs_creator(Input_& input, CompareType_ cmp, unsigned_type memory_to_use)
+    //! \param memory_to_use memory amount that is allowed to used by the
+    //! sorter in bytes
+    basic_runs_creator(Input& input, CompareType cmp,
+                       unsigned_type memory_to_use)
         : m_input(input),
           m_cmp(cmp),
           m_result(new sorted_runs_data_type),
-          m_memsize(memory_to_use / BlockSize_ / sort_memory_usage_factor()),
+          m_memsize(memory_to_use / BlockSize / sort_memory_usage_factor()),
           m_result_computed(false)
     {
         sort_helper::verify_sentinel_strict_weak_ordering(cmp);
-        if (!(2 * BlockSize_ * sort_memory_usage_factor() <= memory_to_use)) {
-            throw bad_parameter("stxxl::runs_creator<>:runs_creator(): INSUFFICIENT MEMORY provided, please increase parameter 'memory_to_use'");
+        if (!(2 * BlockSize * sort_memory_usage_factor() <= memory_to_use)) {
+            throw bad_parameter("stxxl::runs_creator<>:runs_creator(): "
+                                "INSUFFICIENT MEMORY provided, "
+                                "please increase parameter 'memory_to_use'");
         }
         assert(m_memsize > 0);
     }
@@ -153,8 +164,8 @@ public:
 //! Finish the results, i. e. create all runs.
 //!
 //! This is the main routine of this class.
-template <class Input_, class CompareType_, unsigned BlockSize_, class AllocStr_>
-void basic_runs_creator<Input_, CompareType_, BlockSize_, AllocStr_>::compute_result()
+template <class Input, class CompareType, unsigned BlockSize, class AllocStr>
+void basic_runs_creator<Input, CompareType, BlockSize, AllocStr>::compute_result()
 {
     unsigned_type i = 0;
     unsigned_type m2 = m_memsize / 2;
@@ -177,7 +188,8 @@ void basic_runs_creator<Input_, CompareType_, BlockSize_, AllocStr_>::compute_re
     if (blocks1_length == block_type::size && !input.empty())
     {
         Blocks1 = new block_type[m2 * 2];
-        std::copy(m_result->small_run.begin(), m_result->small_run.end(), Blocks1[0].begin());
+        std::copy(m_result->small_run.begin(), m_result->small_run.end(),
+                  Blocks1[0].begin());
         m_result->small_run.clear();
     }
     else
@@ -214,7 +226,7 @@ void basic_runs_creator<Input_, CompareType_, BlockSize_, AllocStr_>::compute_re
 
     unsigned_type cur_run_size = div_ceil(blocks1_length, block_type::size);      // in blocks
     run.resize(cur_run_size);
-    bm->new_blocks(AllocStr_(), make_bid_iterator(run.begin()), make_bid_iterator(run.end()));
+    bm->new_blocks(AllocStr(), make_bid_iterator(run.begin()), make_bid_iterator(run.end()));
 
     disk_queues::get_instance()->set_priority_op(request_queue::WRITE);
 
@@ -253,7 +265,7 @@ void basic_runs_creator<Input_, CompareType_, BlockSize_, AllocStr_>::compute_re
 
         cur_run_size = div_ceil(blocks2_length, block_type::size);
         run.resize(cur_run_size);
-        bm->new_blocks(AllocStr_(), make_bid_iterator(run.begin()), make_bid_iterator(run.end()));
+        bm->new_blocks(AllocStr(), make_bid_iterator(run.begin()), make_bid_iterator(run.end()));
 
         // fill the rest of the last block with max values
         fill_with_max_value(Blocks1, cur_run_size, blocks2_length);
@@ -295,7 +307,7 @@ void basic_runs_creator<Input_, CompareType_, BlockSize_, AllocStr_>::compute_re
 
     cur_run_size = div_ceil(blocks2_length, block_type::size);      // in blocks
     run.resize(cur_run_size);
-    bm->new_blocks(AllocStr_(), make_bid_iterator(run.begin()), make_bid_iterator(run.end()));
+    bm->new_blocks(AllocStr(), make_bid_iterator(run.begin()), make_bid_iterator(run.end()));
 
     for (i = 0; i < cur_run_size; ++i)
     {
@@ -313,7 +325,7 @@ void basic_runs_creator<Input_, CompareType_, BlockSize_, AllocStr_>::compute_re
         sort_run(Blocks1, blocks1_length);
         cur_run_size = div_ceil(blocks1_length, block_type::size);      // in blocks
         run.resize(cur_run_size);
-        bm->new_blocks(AllocStr_(), make_bid_iterator(run.begin()), make_bid_iterator(run.end()));
+        bm->new_blocks(AllocStr(), make_bid_iterator(run.begin()), make_bid_iterator(run.end()));
 
         // fill the rest of the last block with max values (occurs only on the last run)
         fill_with_max_value(Blocks1, cur_run_size, blocks1_length);
@@ -337,19 +349,20 @@ void basic_runs_creator<Input_, CompareType_, BlockSize_, AllocStr_>::compute_re
 
 //! Forms sorted runs of data from a stream.
 //!
-//! \tparam Input_ type of the input stream
-//! \tparam CompareType_ type of omparison object used for sorting the runs
-//! \tparam BlockSize_ size of blocks used to store the runs
-//! \tparam AllocStr_ functor that defines allocation strategy for the runs
+//! \tparam Input type of the input stream
+//! \tparam CompareType type of omparison object used for sorting the runs
+//! \tparam BlockSize size of blocks used to store the runs
+//! \tparam AllocStr functor that defines allocation strategy for the runs
 template <
-    class Input_,
-    class CompareType_,
-    unsigned BlockSize_ = STXXL_DEFAULT_BLOCK_SIZE(typename Input_::value_type),
-    class AllocStr_ = STXXL_DEFAULT_ALLOC_STRATEGY>
-class runs_creator : public basic_runs_creator<Input_, CompareType_, BlockSize_, AllocStr_>
+    class Input,
+    class CompareType,
+    unsigned BlockSize = STXXL_DEFAULT_BLOCK_SIZE(typename Input::value_type),
+    class AllocStr = STXXL_DEFAULT_ALLOC_STRATEGY
+    >
+class runs_creator : public basic_runs_creator<Input, CompareType, BlockSize, AllocStr>
 {
 private:
-    typedef basic_runs_creator<Input_, CompareType_, BlockSize_, AllocStr_> base;
+    typedef basic_runs_creator<Input, CompareType, BlockSize, AllocStr> base;
 
 public:
     typedef typename base::cmp_type cmp_type;
@@ -362,13 +375,13 @@ public:
     //! Creates the object.
     //! \param input input stream
     //! \param cmp comparator object
-    //! \param memory_to_use memory amount that is allowed to used by the sorter in bytes
-    runs_creator(Input_& input, CompareType_ cmp, unsigned_type memory_to_use)
+    //! \param memory_to_use memory amount that is allowed to used by the
+    //! sorter in bytes
+    runs_creator(Input& input, CompareType cmp, unsigned_type memory_to_use)
         : base(input, cmp, memory_to_use)
     { }
 };
 
-
 //! Input strategy for \c runs_creator class.
 //!
 //! This strategy together with \c runs_creator class
@@ -376,10 +389,10 @@ public:
 //! data structure usable for \c runs_merger
 //! pushing elements into the sorter
 //! (using runs_creator::push())
-template <class ValueType_>
+template <class ValueType>
 struct use_push
 {
-    typedef ValueType_ value_type;
+    typedef ValueType value_type;
 };
 
 //! Forms sorted runs of elements passed in push() method.
@@ -388,34 +401,37 @@ struct use_push
 //! allows to create sorted runs
 //! data structure usable for \c runs_merger from
 //! elements passed in sorted push() method. <BR>
-//! \tparam ValueType_ type of values (parameter for \c use_push strategy)
-//! \tparam CompareType_ type of comparison object used for sorting the runs
-//! \tparam BlockSize_ size of blocks used to store the runs
-//! \tparam AllocStr_ functor that defines allocation strategy for the runs
+//! \tparam ValueType type of values (parameter for \c use_push strategy)
+//! \tparam CompareType type of comparison object used for sorting the runs
+//! \tparam BlockSize size of blocks used to store the runs
+//! \tparam AllocStr functor that defines allocation strategy for the runs
 template <
-    class ValueType_,
-    class CompareType_,
-    unsigned BlockSize_,
-    class AllocStr_>
+    class ValueType,
+    class CompareType,
+    unsigned BlockSize,
+    class AllocStr
+    >
 class runs_creator<
-    use_push<ValueType_>,
-    CompareType_,
-    BlockSize_,
-    AllocStr_>
-    : private noncopyable
+    use_push<ValueType>,
+    CompareType,
+    BlockSize,
+    AllocStr
+    >: private noncopyable
 {
 public:
-    typedef CompareType_ cmp_type;
-    typedef ValueType_ value_type;
-    typedef typed_block<BlockSize_, value_type> block_type;
+    typedef CompareType cmp_type;
+    typedef ValueType value_type;
+    typedef typed_block<BlockSize, value_type> block_type;
     typedef sort_helper::trigger_entry<block_type> trigger_entry_type;
     typedef sorted_runs<trigger_entry_type, cmp_type> sorted_runs_data_type;
     typedef counting_ptr<sorted_runs_data_type> sorted_runs_type;
     typedef sorted_runs_type result_type;
 
+    typedef typename element_iterator_traits<block_type, external_size_type>::element_iterator element_iterator;
+
 private:
     //! comparator object to sort runs
-    CompareType_ m_cmp;
+    CompareType m_cmp;
 
     typedef typename sorted_runs_data_type::run_type run_type;
 
@@ -438,7 +454,7 @@ private:
     const unsigned_type m_el_in_run;
 
     //! current number of elements in the run m_blocks1
-    unsigned_type m_cur_el;
+    internal_size_type m_cur_el;
 
     //! accumulation buffer of size m_m2 blocks, half the available memory size
     block_type* m_blocks1;
@@ -446,7 +462,8 @@ private:
     //! accumulation buffer that is currently being written to disk
     block_type* m_blocks2;
 
-    //! reference to write requests transporting the last accumulation buffer to disk
+    //! reference to write requests transporting the last accumulation buffer
+    //! to disk
     request_ptr* m_write_reqs;
 
     //! run object containing block ids of the run being written to disk
@@ -454,12 +471,12 @@ private:
 
 protected:
     //!  fill the rest of the block with max values
-    void fill_with_max_value(block_type* blocks, unsigned_type num_blocks, unsigned_type first_idx)
+    void fill_with_max_value(block_type* blocks, unsigned_type num_blocks,
+                             unsigned_type first_idx)
     {
         unsigned_type last_idx = num_blocks * block_type::size;
         if (first_idx < last_idx) {
-            typename element_iterator_traits<block_type>::element_iterator curr =
-                make_element_iterator(blocks, first_idx);
+            element_iterator curr = make_element_iterator(blocks, first_idx);
             while (first_idx != last_idx) {
                 *curr = m_cmp.max_value();
                 ++curr;
@@ -496,7 +513,7 @@ protected:
         const unsigned_type cur_run_size = div_ceil(m_cur_el, block_type::size);         // in blocks
         run.resize(cur_run_size);
         block_manager* bm = block_manager::get_instance();
-        bm->new_blocks(AllocStr_(), make_bid_iterator(run.begin()), make_bid_iterator(run.end()));
+        bm->new_blocks(AllocStr(), make_bid_iterator(run.begin()), make_bid_iterator(run.end()));
 
         disk_queues::get_instance()->set_priority_op(request_queue::WRITE);
 
@@ -525,18 +542,20 @@ public:
     //! Creates the object.
     //! \param cmp comparator object
     //! \param memory_to_use memory amount that is allowed to used by the sorter in bytes
-    runs_creator(CompareType_ cmp, unsigned_type memory_to_use) :
-        m_cmp(cmp),
-        m_memory_to_use(memory_to_use),
-        m_memsize(memory_to_use / BlockSize_ / sort_memory_usage_factor()),
-        m_m2(m_memsize / 2),
-        m_el_in_run(m_m2 * block_type::size),
-        m_blocks1(NULL), m_blocks2(NULL),
-        m_write_reqs(NULL)
+    runs_creator(CompareType cmp, unsigned_type memory_to_use)
+        : m_cmp(cmp),
+          m_memory_to_use(memory_to_use),
+          m_memsize(memory_to_use / BlockSize / sort_memory_usage_factor()),
+          m_m2(m_memsize / 2),
+          m_el_in_run(m_m2 * block_type::size),
+          m_blocks1(NULL), m_blocks2(NULL),
+          m_write_reqs(NULL)
     {
         sort_helper::verify_sentinel_strict_weak_ordering(m_cmp);
-        if (!(2 * BlockSize_ * sort_memory_usage_factor() <= m_memory_to_use)) {
-            throw bad_parameter("stxxl::runs_creator<>:runs_creator(): INSUFFICIENT MEMORY provided, please increase parameter 'memory_to_use'");
+        if (!(2 * BlockSize * sort_memory_usage_factor() <= m_memory_to_use)) {
+            throw bad_parameter("stxxl::runs_creator<>:runs_creator(): "
+                                "INSUFFICIENT MEMORY provided, "
+                                "please increase parameter 'memory_to_use'");
         }
         assert(m_m2 > 0);
 
@@ -617,7 +636,7 @@ public:
         const unsigned_type cur_run_blocks = div_ceil(m_el_in_run, block_type::size);        // in blocks
         run.resize(cur_run_blocks);
         block_manager* bm = block_manager::get_instance();
-        bm->new_blocks(AllocStr_(), make_bid_iterator(run.begin()), make_bid_iterator(run.end()));
+        bm->new_blocks(AllocStr(), make_bid_iterator(run.begin()), make_bid_iterator(run.end()));
 
         disk_queues::get_instance()->set_priority_op(request_queue::WRITE);
 
@@ -654,7 +673,7 @@ public:
     }
 
     //! number of items currently inserted.
-    unsigned_type size() const
+    external_size_type size() const
     {
         return m_result->elements + m_cur_el;
     }
@@ -672,17 +691,16 @@ public:
     }
 };
 
-
 //! Input strategy for \c runs_creator class.
 //!
 //! This strategy together with \c runs_creator class
 //! allows to create sorted runs
 //! data structure usable for \c runs_merger from
 //! sequences of elements in sorted order
-template <class ValueType_>
+template <class ValueType>
 struct from_sorted_sequences
 {
-    typedef ValueType_ value_type;
+    typedef ValueType value_type;
 };
 
 //! Forms sorted runs of data taking elements in sorted order (element by element).
@@ -691,30 +709,31 @@ struct from_sorted_sequences
 //! allows to create sorted runs
 //! data structure usable for \c runs_merger from
 //! sequences of elements in sorted order. <BR>
-//! \tparam ValueType_ type of values (parameter for \c from_sorted_sequences strategy)
-//! \tparam CompareType_ type of comparison object used for sorting the runs
-//! \tparam BlockSize_ size of blocks used to store the runs
-//! \tparam AllocStr_ functor that defines allocation strategy for the runs
+//! \tparam ValueType type of values (parameter for \c from_sorted_sequences strategy)
+//! \tparam CompareType type of comparison object used for sorting the runs
+//! \tparam BlockSize size of blocks used to store the runs
+//! \tparam AllocStr functor that defines allocation strategy for the runs
 template <
-    class ValueType_,
-    class CompareType_,
-    unsigned BlockSize_,
-    class AllocStr_>
+    class ValueType,
+    class CompareType,
+    unsigned BlockSize,
+    class AllocStr
+    >
 class runs_creator<
-    from_sorted_sequences<ValueType_>,
-    CompareType_,
-    BlockSize_,
-    AllocStr_>
-    : private noncopyable
+    from_sorted_sequences<ValueType>,
+    CompareType,
+    BlockSize,
+    AllocStr
+    >: private noncopyable
 {
 public:
-    typedef ValueType_ value_type;
-    typedef typed_block<BlockSize_, value_type> block_type;
+    typedef ValueType value_type;
+    typedef typed_block<BlockSize, value_type> block_type;
     typedef sort_helper::trigger_entry<block_type> trigger_entry_type;
-    typedef AllocStr_ alloc_strategy_type;
+    typedef AllocStr alloc_strategy_type;
 
 public:
-    typedef CompareType_ cmp_type;
+    typedef CompareType cmp_type;
     typedef sorted_runs<trigger_entry_type, cmp_type> sorted_runs_data_type;
     typedef counting_ptr<sorted_runs_data_type> sorted_runs_type;
     typedef sorted_runs_type result_type;
@@ -722,36 +741,41 @@ public:
 private:
     typedef typename sorted_runs_data_type::run_type run_type;
 
-    CompareType_ cmp;
+    CompareType cmp;
 
-    sorted_runs_type result_;     // stores the result (sorted runs)
-    unsigned_type m_;             // memory for internal use in blocks
+    //! stores the result (sorted runs)
+    sorted_runs_type result_;
+    //! memory for internal use in blocks
+    unsigned_type m_;
     buffered_writer<block_type> writer;
     block_type* cur_block;
     unsigned_type offset;
     unsigned_type iblock;
     unsigned_type irun;
-    alloc_strategy_type alloc_strategy;      // needs to be reset after each run
+    //! needs to be reset after each run
+    alloc_strategy_type alloc_strategy;
 
 public:
     //! Creates the object.
     //! \param c comparator object
     //! \param memory_to_use memory amount that is allowed to used by the sorter in bytes.
     //! Recommended value: 2 * block_size * D
-    runs_creator(CompareType_ c, unsigned_type memory_to_use) :
-        cmp(c),
-        result_(new sorted_runs_data_type),
-        m_(memory_to_use / BlockSize_ / sort_memory_usage_factor()),
-        writer(m_, m_ / 2),
-        cur_block(writer.get_free_block()),
-        offset(0),
-        iblock(0),
-        irun(0)
+    runs_creator(CompareType c, unsigned_type memory_to_use)
+        : cmp(c),
+          result_(new sorted_runs_data_type),
+          m_(memory_to_use / BlockSize / sort_memory_usage_factor()),
+          writer(m_, m_ / 2),
+          cur_block(writer.get_free_block()),
+          offset(0),
+          iblock(0),
+          irun(0)
     {
         sort_helper::verify_sentinel_strict_weak_ordering(cmp);
         assert(m_ > 0);
-        if (!(2 * BlockSize_ * sort_memory_usage_factor() <= memory_to_use)) {
-            throw bad_parameter("stxxl::runs_creator<>:runs_creator(): INSUFFICIENT MEMORY provided, please increase parameter 'memory_to_use'");
+        if (!(2 * BlockSize * sort_memory_usage_factor() <= memory_to_use)) {
+            throw bad_parameter("stxxl::runs_creator<>:runs_creator(): "
+                                "INSUFFICIENT MEMORY provided, "
+                                "please increase parameter 'memory_to_use'");
         }
     }
 
@@ -795,7 +819,6 @@ public:
         if (offset == 0 && iblock == 0)     // current run is empty
             return;
 
-
         result_->runs_sizes.resize(irun + 1);
         result_->runs_sizes.back() = iblock * block_type::size + offset;
 
@@ -842,16 +865,15 @@ public:
     }
 };
 
-
 //! Checker for the sorted runs object created by the \c runs_creator .
 //! \param sruns sorted runs object
 //! \param cmp comparison object used for checking the order of elements in runs
 //! \return \c true if runs are sorted, \c false otherwise
-template <class RunsType_, class CompareType_>
-bool check_sorted_runs(const RunsType_& sruns, CompareType_ cmp)
+template <class RunsType, class CompareType>
+bool check_sorted_runs(const RunsType& sruns, CompareType cmp)
 {
     sort_helper::verify_sentinel_strict_weak_ordering(cmp);
-    typedef typename RunsType_::element_type::block_type block_type;
+    typedef typename RunsType::element_type::block_type block_type;
     STXXL_VERBOSE2("Elements: " << sruns->elements);
     unsigned_type nruns = sruns->runs.size();
     STXXL_VERBOSE2("Runs: " << nruns);
@@ -878,9 +900,10 @@ bool check_sorted_runs(const RunsType_& sruns, CompareType_ cmp)
                 return false;
             }
         }
-        if (!stxxl::is_sorted(make_element_iterator(blocks, 0),
-                              make_element_iterator(blocks, sruns->runs_sizes[irun]),
-                              cmp))
+        if (!stxxl::is_sorted(
+                make_element_iterator(blocks, 0),
+                make_element_iterator(blocks, sruns->runs_sizes[irun]),
+                cmp))
         {
             STXXL_ERRMSG("check_sorted_runs  wrong order in the run");
             delete[] blocks;
@@ -895,26 +918,25 @@ bool check_sorted_runs(const RunsType_& sruns, CompareType_ cmp)
     return true;
 }
 
-
 ////////////////////////////////////////////////////////////////////////
 //     MERGE RUNS                                                     //
 ////////////////////////////////////////////////////////////////////////
 
 //! Merges sorted runs.
 //!
-//! \tparam RunsType_ type of the sorted runs, available as \c runs_creator::sorted_runs_type ,
-//! \tparam CompareType_ type of comparison object used for merging
-//! \tparam AllocStr_ allocation strategy used to allocate the blocks for
+//! \tparam RunsType type of the sorted runs, available as \c runs_creator::sorted_runs_type ,
+//! \tparam CompareType type of comparison object used for merging
+//! \tparam AllocStr allocation strategy used to allocate the blocks for
 //! storing intermediate results if several merge passes are required
-template <class RunsType_,
-          class CompareType_,
-          class AllocStr_ = STXXL_DEFAULT_ALLOC_STRATEGY>
+template <class RunsType,
+          class CompareType,
+          class AllocStr = STXXL_DEFAULT_ALLOC_STRATEGY>
 class basic_runs_merger : private noncopyable
 {
 public:
-    typedef RunsType_ sorted_runs_type;
-    typedef CompareType_ value_cmp;
-    typedef AllocStr_ alloc_strategy;
+    typedef RunsType sorted_runs_type;
+    typedef CompareType value_cmp;
+    typedef AllocStr alloc_strategy;
 
     typedef typename sorted_runs_type::element_type sorted_runs_data_type;
     typedef typename sorted_runs_data_type::size_type size_type;
@@ -1130,7 +1152,10 @@ public:
 
         int_type disks_number = config::get_instance()->disks_number();
         unsigned_type min_prefetch_buffers = 2 * disks_number;
-        unsigned_type input_buffers = (m_memory_to_use > sizeof(out_block_type) ? m_memory_to_use - sizeof(out_block_type) : 0) / block_type::raw_size;
+        unsigned_type input_buffers =
+            (m_memory_to_use > sizeof(out_block_type)
+             ? m_memory_to_use - sizeof(out_block_type)
+             : 0) / block_type::raw_size;
         unsigned_type nruns = m_sruns->runs.size();
 
         if (input_buffers < nruns + min_prefetch_buffers)
@@ -1194,7 +1219,7 @@ public:
             m_consume_seq,
             m_prefetch_seq,
             n_opt_prefetch_buffers,
-            disks_number);
+            config::get_instance()->get_max_device_id());
 #else
         for (unsigned_type i = 0; i < prefetch_seq_size; ++i)
             m_prefetch_seq[i] = i;
@@ -1305,16 +1330,16 @@ public:
     }
 };
 
-
-template <class RunsType_, class CompareType_, class AllocStr_>
-void basic_runs_merger<RunsType_, CompareType_, AllocStr_>::merge_recursively()
+template <class RunsType, class CompareType, class AllocStr>
+void basic_runs_merger<RunsType, CompareType, AllocStr>::merge_recursively()
 {
     block_manager* bm = block_manager::get_instance();
     unsigned_type ndisks = config::get_instance()->disks_number();
     unsigned_type nwrite_buffers = 2 * ndisks;
     unsigned_type memory_for_write_buffers = nwrite_buffers * sizeof(block_type);
 
-    // memory consumption of the recursive merger (uses block_type as out_block_type)
+    // memory consumption of the recursive merger (uses block_type as
+    // out_block_type)
     unsigned_type recursive_merger_memory_prefetch_buffers = 2 * ndisks * sizeof(block_type);
     unsigned_type recursive_merger_memory_out_block = sizeof(block_type);
     unsigned_type memory_for_buffers = memory_for_write_buffers
@@ -1335,7 +1360,8 @@ void basic_runs_merger<RunsType_, CompareType_, AllocStr_>::merge_recursively()
                   " opt_merge_factor: " << merge_factor <<
                   " max_arity: " << max_arity << " new_nruns: " << new_nruns);
 
-        // construct new sorted_runs data object which will be swapped into m_sruns
+        // construct new sorted_runs data object which will be swapped into
+        // m_sruns
 
         sorted_runs_data_type new_runs;
         new_runs.runs.resize(new_nruns);
@@ -1364,7 +1390,7 @@ void basic_runs_merger<RunsType_, CompareType_, AllocStr_>::merge_recursively()
                 new_runs.runs_sizes[cur_out_run] = elements_in_new_run;
 
                 // calculate blocks in run
-                const unsigned_type blocks_in_new_run = div_ceil(elements_in_new_run, block_type::size);
+                const unsigned_type blocks_in_new_run = (unsigned_type)div_ceil(elements_in_new_run, block_type::size);
 
                 // allocate blocks for the new runs
                 new_runs.runs[cur_out_run].resize(blocks_in_new_run);
@@ -1389,7 +1415,8 @@ void basic_runs_merger<RunsType_, CompareType_, AllocStr_>::merge_recursively()
 
                 // construct recursive merger
 
-                basic_runs_merger<RunsType_, CompareType_, AllocStr_> merger(m_cmp, m_memory_to_use - memory_for_write_buffers);
+                basic_runs_merger<RunsType, CompareType, AllocStr>
+                merger(m_cmp, m_memory_to_use - memory_for_write_buffers);
                 merger.initialize(cur_runs);
 
                 {       // make sure everything is being destroyed in right time
@@ -1404,7 +1431,7 @@ void basic_runs_merger<RunsType_, CompareType_, AllocStr_>::merge_recursively()
                     {
                         *out = *merger;
                         if ((cnt % block_type::size) == 0)     // have to write the trigger value
-                            new_runs.runs[cur_out_run][cnt / size_type(block_type::size)].value = *merger;
+                            new_runs.runs[cur_out_run][(unsigned_type)(cnt / size_type(block_type::size))].value = *merger;
 
                         ++cnt, ++out, ++merger;
                     }
@@ -1436,30 +1463,33 @@ void basic_runs_merger<RunsType_, CompareType_, AllocStr_>::merge_recursively()
 
         assert(elements_left == 0);
 
-        m_sruns->runs.clear();   // clear bid vector of m_sruns to skip deallocation of blocks in destructor
+        // clear bid vector of m_sruns to skip deallocation of blocks in
+        // destructor
+        m_sruns->runs.clear();
 
+        // replaces data in referenced counted object m_sruns end while (nruns
+        // > max_arity)
         std::swap(nruns, new_nruns);
-        m_sruns->swap(new_runs); // replaces data in referenced counted object m_sruns
-    }                            // end while (nruns > max_arity)
+        m_sruns->swap(new_runs);
+    }
 }
 
-
 //! Merges sorted runs.
 //!
-//! \tparam RunsType_ type of the sorted runs, available as \c runs_creator::sorted_runs_type ,
-//! \tparam CompareType_ type of comparison object used for merging
-//! \tparam AllocStr_ allocation strategy used to allocate the blocks for
+//! \tparam RunsType type of the sorted runs, available as \c runs_creator::sorted_runs_type ,
+//! \tparam CompareType type of comparison object used for merging
+//! \tparam AllocStr allocation strategy used to allocate the blocks for
 //! storing intermediate results if several merge passes are required
-template <class RunsType_,
-          class CompareType_ = typename RunsType_::element_type::cmp_type,
-          class AllocStr_ = STXXL_DEFAULT_ALLOC_STRATEGY>
-class runs_merger : public basic_runs_merger<RunsType_, CompareType_, AllocStr_>
+template <class RunsType,
+          class CompareType = typename RunsType::element_type::cmp_type,
+          class AllocStr = STXXL_DEFAULT_ALLOC_STRATEGY>
+class runs_merger : public basic_runs_merger<RunsType, CompareType, AllocStr>
 {
 protected:
-    typedef basic_runs_merger<RunsType_, CompareType_, AllocStr_> base;
+    typedef basic_runs_merger<RunsType, CompareType, AllocStr> base;
 
 public:
-    typedef RunsType_ sorted_runs_type;
+    typedef RunsType sorted_runs_type;
     typedef typename base::value_cmp value_cmp;
     typedef typename base::value_cmp cmp_type;
     typedef typename base::block_type block_type;
@@ -1469,7 +1499,8 @@ public:
     //! \param sruns input sorted runs object
     //! \param cmp comparison object
     //! \param memory_to_use amount of memory available for the merger in bytes
-    runs_merger(sorted_runs_type& sruns, value_cmp cmp, unsigned_type memory_to_use)
+    runs_merger(sorted_runs_type& sruns, value_cmp cmp,
+                unsigned_type memory_to_use)
         : base(cmp, memory_to_use)
     {
         this->initialize(sruns);
@@ -1483,42 +1514,44 @@ public:
     { }
 };
 
-
 ////////////////////////////////////////////////////////////////////////
 //     SORT                                                           //
 ////////////////////////////////////////////////////////////////////////
 
 //! Produces sorted stream from input stream.
 //!
-//! \tparam Input_ type of the input stream
-//! \tparam CompareType_ type of comparison object used for sorting the runs
-//! \tparam BlockSize_ size of blocks used to store the runs
-//! \tparam AllocStr_ functor that defines allocation strategy for the runs
+//! \tparam Input type of the input stream
+//! \tparam CompareType type of comparison object used for sorting the runs
+//! \tparam BlockSize size of blocks used to store the runs
+//! \tparam AllocStr functor that defines allocation strategy for the runs
 //! \remark Implemented as the composition of \c runs_creator and \c runs_merger .
-template <class Input_,
-          class CompareType_,
-          unsigned BlockSize_ = STXXL_DEFAULT_BLOCK_SIZE(typename Input_::value_type),
-          class AllocStr_ = STXXL_DEFAULT_ALLOC_STRATEGY,
-          class runs_creator_type = runs_creator<Input_, CompareType_, BlockSize_, AllocStr_> >
+template <
+    class Input,
+    class CompareType,
+    unsigned BlockSize = STXXL_DEFAULT_BLOCK_SIZE(typename Input::value_type),
+    class AllocStr = STXXL_DEFAULT_ALLOC_STRATEGY,
+    class RunsCreatorType = runs_creator<Input, CompareType, BlockSize, AllocStr>
+    >
 class sort : public noncopyable
 {
+    typedef RunsCreatorType runs_creator_type;
     typedef typename runs_creator_type::sorted_runs_type sorted_runs_type;
-    typedef runs_merger<sorted_runs_type, CompareType_, AllocStr_> runs_merger_type;
+    typedef runs_merger<sorted_runs_type, CompareType, AllocStr> runs_merger_type;
 
     runs_creator_type creator;
     runs_merger_type merger;
 
 public:
     //! Standard stream typedef.
-    typedef typename Input_::value_type value_type;
+    typedef typename Input::value_type value_type;
 
     //! Creates the object.
     //! \param in input stream
     //! \param c comparator object
     //! \param memory_to_use memory amount that is allowed to used by the sorter in bytes
-    sort(Input_& in, CompareType_ c, unsigned_type memory_to_use) :
-        creator(in, c, memory_to_use),
-        merger(creator.result(), c, memory_to_use)
+    sort(Input& in, CompareType c, unsigned_type memory_to_use)
+        : creator(in, c, memory_to_use),
+          merger(creator.result(), c, memory_to_use)
     {
         sort_helper::verify_sentinel_strict_weak_ordering(c);
     }
@@ -1528,14 +1561,14 @@ public:
     //! \param c comparator object
     //! \param m_memory_to_userc memory amount that is allowed to used by the runs creator in bytes
     //! \param m_memory_to_use memory amount that is allowed to used by the merger in bytes
-    sort(Input_& in, CompareType_ c, unsigned_type m_memory_to_userc, unsigned_type m_memory_to_use) :
-        creator(in, c, m_memory_to_userc),
-        merger(creator.result(), c, m_memory_to_use)
+    sort(Input& in, CompareType c, unsigned_type m_memory_to_userc,
+         unsigned_type m_memory_to_use)
+        : creator(in, c, m_memory_to_userc),
+          merger(creator.result(), c, m_memory_to_use)
     {
         sort_helper::verify_sentinel_strict_weak_ordering(c);
     }
 
-
     //! Standard stream method.
     bool empty() const
     {
@@ -1565,15 +1598,16 @@ public:
 
 //! Computes sorted runs type from value type and block size.
 //!
-//! \tparam ValueType_ type of values ins sorted runs
-//! \tparam BlockSize_ size of blocks where sorted runs stored
+//! \tparam ValueType type of values ins sorted runs
+//! \tparam BlockSize size of blocks where sorted runs stored
 template <
-    class ValueType_,
-    unsigned BlockSize_>
+    class ValueType,
+    unsigned BlockSize
+    >
 class compute_sorted_runs_type
 {
-    typedef ValueType_ value_type;
-    typedef BID<BlockSize_> bid_type;
+    typedef ValueType value_type;
+    typedef BID<BlockSize> bid_type;
     typedef sort_helper::trigger_entry<bid_type, value_type> trigger_entry_type;
 
 public:
@@ -1597,10 +1631,12 @@ public:
 //!
 //! The \c BlockSize template parameter defines the block size to use (in bytes)
 //! \warning Slower than External Iterator Sort
-template <unsigned BlockSize,
-          class RandomAccessIterator,
-          class CmpType,
-          class AllocStr>
+template <
+    unsigned BlockSize,
+    class RandomAccessIterator,
+    class CmpType,
+    class AllocStr
+    >
 void sort(RandomAccessIterator begin,
           RandomAccessIterator end,
           CmpType cmp,
@@ -1608,11 +1644,7 @@ void sort(RandomAccessIterator begin,
           AllocStr AS)
 {
     STXXL_UNUSED(AS);
-#if STXXL_MSVC
-    typedef typename streamify_traits<RandomAccessIterator>::stream_type InputType;
-#else
-    typedef __typeof__ (stream::streamify(begin, end)) InputType;
-#endif // STXXL_MSVC
+    typedef typename stream::streamify_traits<RandomAccessIterator>::stream_type InputType;
     InputType Input(begin, end);
     typedef stream::sort<InputType, CmpType, BlockSize, AllocStr> sorter_type;
     sorter_type Sort(Input, cmp, MemSize);
diff --git a/include/stxxl/bits/stream/sorted_runs.h b/include/stxxl/bits/stream/sorted_runs.h
index b7ebacf..faa6be7 100644
--- a/include/stxxl/bits/stream/sorted_runs.h
+++ b/include/stxxl/bits/stream/sorted_runs.h
@@ -21,7 +21,6 @@
 #include <stxxl/bits/algo/adaptor.h>
 #include <stxxl/bits/common/counting_ptr.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 namespace stream {
@@ -29,7 +28,6 @@ namespace stream {
 //! \addtogroup streampack Stream Package
 //! \{
 
-
 ////////////////////////////////////////////////////////////////////////
 //     SORTED RUNS                                                    //
 ////////////////////////////////////////////////////////////////////////
@@ -40,7 +38,8 @@ struct sorted_runs : private noncopyable, public counted_object
 {
     typedef TriggerEntryType trigger_entry_type;
     typedef typename trigger_entry_type::block_type block_type;
-    typedef typename block_type::value_type value_type;      // may differ from trigger_entry_type::value_type
+    //! may differ from trigger_entry_type::value_type
+    typedef typename block_type::value_type value_type;
     typedef std::vector<trigger_entry_type> run_type;
     typedef std::vector<value_type> small_run_type;
     typedef stxxl::external_size_type size_type;
@@ -119,7 +118,6 @@ private:
     }
 };
 
-
 //! \}
 
 } // namespace stream
diff --git a/include/stxxl/bits/stream/stream.h b/include/stxxl/bits/stream/stream.h
index a136b16..919ad46 100644
--- a/include/stxxl/bits/stream/stream.h
+++ b/include/stxxl/bits/stream/stream.h
@@ -23,43 +23,18 @@
 #include <stxxl/vector>
 #include <stxxl/bits/compat/unique_ptr.h>
 
-
 #ifndef STXXL_VERBOSE_MATERIALIZE
 #define STXXL_VERBOSE_MATERIALIZE STXXL_VERBOSE3
 #endif
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! Stream package subnamespace.
 namespace stream {
 
-//! \defgroup streampack Stream Package
-//! Package that enables pipelining of consequent sorts
-//! and scans of the external data avoiding the saving the intermediate
-//! results on the disk, e.g. the output of a sort can be directly
-//! fed into a scan procedure without the need to save it on a disk.
-//! All components of the package are contained in the \c stxxl::stream
-//! namespace.
-//!
-//!    STREAM ALGORITHM CONCEPT (Do not confuse with C++ input/output streams)
-//!
-//! \verbatim
-//!
-//!    struct stream_algorithm // stream, pipe, whatever
-//!    {
-//!      typedef some_type value_type;
-//!
-//!      const value_type & operator * () const; // return current element of the stream
-//!      stream_algorithm & operator ++ ();      // go to next element. precondition: empty() == false
-//!      bool empty() const;                     // return true if end of stream is reached
-//!
-//!    };
-//! \endverbatim
-//!
+//! \addtogroup streampack
 //! \{
 
-
 ////////////////////////////////////////////////////////////////////////
 //     STREAMIFY                                                      //
 ////////////////////////////////////////////////////////////////////////
@@ -67,75 +42,77 @@ namespace stream {
 //! A model of stream that retrieves the data from an input iterator.
 //! For convenience use \c streamify function instead of direct instantiation
 //! of \c iterator2stream .
-template <class InputIterator_>
+template <class InputIterator>
 class iterator2stream
 {
-    InputIterator_ current_, end_;
+    InputIterator m_current, m_end;
 
 public:
     //! Standard stream typedef.
-    typedef typename std::iterator_traits<InputIterator_>::value_type value_type;
+    typedef typename std::iterator_traits<InputIterator>::value_type value_type;
 
-    iterator2stream(InputIterator_ begin, InputIterator_ end) :
-        current_(begin), end_(end) { }
+    iterator2stream(InputIterator begin, InputIterator end)
+        : m_current(begin), m_end(end)
+    { }
 
-    iterator2stream(const iterator2stream& a) : current_(a.current_), end_(a.end_) { }
+    iterator2stream(const iterator2stream& a)
+        : m_current(a.m_current), m_end(a.m_end)
+    { }
 
     //! Standard stream method.
     const value_type& operator * () const
     {
-        return *current_;
+        return *m_current;
     }
 
     const value_type* operator -> () const
     {
-        return &(*current_);
+        return &(*m_current);
     }
 
     //! Standard stream method.
     iterator2stream& operator ++ ()
     {
-        assert(end_ != current_);
-        ++current_;
+        assert(m_end != m_current);
+        ++m_current;
         return *this;
     }
 
     //! Standard stream method.
     bool empty() const
     {
-        return (current_ == end_);
+        return (m_current == m_end);
     }
 };
 
-
 //! Input iterator range to stream converter.
 //! \param begin iterator, pointing to the first value
 //! \param end iterator, pointing to the last + 1 position, i.e. beyond the range
 //! \return an instance of a stream object
-template <class InputIterator_>
-iterator2stream<InputIterator_> streamify(InputIterator_ begin, InputIterator_ end)
+template <class InputIterator>
+iterator2stream<InputIterator> streamify(InputIterator begin, InputIterator end)
 {
-    return iterator2stream<InputIterator_>(begin, end);
+    return iterator2stream<InputIterator>(begin, end);
 }
 
 //! Traits class of \c streamify function.
-template <class InputIterator_>
+template <class InputIterator>
 struct streamify_traits
 {
-    //! return type (stream type) of \c streamify for \c InputIterator_.
-    typedef iterator2stream<InputIterator_> stream_type;
+    //! return type (stream type) of \c streamify for \c InputIterator.
+    typedef iterator2stream<InputIterator> stream_type;
 };
 
-//! A model of stream that retrieves data from an external \c stxxl::vector iterator.
-//! It is more efficient than generic \c iterator2stream thanks to use of overlapping
-//! For convenience use \c streamify function instead of direct instantiation
-//! of \c vector_iterator2stream .
-template <class InputIterator_>
+//! A model of stream that retrieves data from an external \c stxxl::vector
+//! iterator.  It is more efficient than generic \c iterator2stream thanks to
+//! use of overlapping For convenience use \c streamify function instead of
+//! direct instantiation of \c vector_iterator2stream .
+template <class InputIterator>
 class vector_iterator2stream
 {
-    InputIterator_ current_, end_;
-    typedef buf_istream<typename InputIterator_::block_type,
-                        typename InputIterator_::bids_container_iterator> buf_istream_type;
+    InputIterator m_current, m_end;
+    typedef buf_istream<typename InputIterator::block_type,
+                        typename InputIterator::bids_container_iterator> buf_istream_type;
 
     typedef typename stxxl::compat_unique_ptr<buf_istream_type>::result buf_istream_unique_ptr_type;
     mutable buf_istream_unique_ptr_type in;
@@ -146,26 +123,32 @@ class vector_iterator2stream
     }
 
 public:
-    typedef vector_iterator2stream<InputIterator_> Self_;
+    typedef vector_iterator2stream<InputIterator> self_type;
 
     //! Standard stream typedef.
-    typedef typename std::iterator_traits<InputIterator_>::value_type value_type;
+    typedef typename std::iterator_traits<InputIterator>::value_type value_type;
 
-    vector_iterator2stream(InputIterator_ begin, InputIterator_ end, unsigned_type nbuffers = 0) :
-        current_(begin), end_(end), in(static_cast<buf_istream_type*>(NULL))
+    vector_iterator2stream(InputIterator begin, InputIterator end,
+                           unsigned_type nbuffers = 0)
+        : m_current(begin), m_end(end),
+          in(static_cast<buf_istream_type*>(NULL))
     {
         if (empty())
             return;
 
         begin.flush();         // flush container
-        typename InputIterator_::bids_container_iterator end_iter = end.bid() + ((end.block_offset()) ? 1 : 0);
+        typename InputIterator::bids_container_iterator end_iter
+            = end.bid() + ((end.block_offset()) ? 1 : 0);
 
         if (end_iter - begin.bid() > 0)
         {
-            in.reset(new buf_istream_type(begin.bid(), end_iter, nbuffers ? nbuffers :
-                                          (2 * config::get_instance()->disks_number())));
+            in.reset(new buf_istream_type(
+                         begin.bid(), end_iter, nbuffers ? nbuffers :
+                         (2 * config::get_instance()->disks_number())
+                         )
+                     );
 
-            InputIterator_ cur = begin - begin.block_offset();
+            InputIterator cur = begin - begin.block_offset();
 
             // skip the beginning of the block
             for ( ; cur != begin; ++cur)
@@ -173,8 +156,9 @@ public:
         }
     }
 
-    vector_iterator2stream(const Self_& a) :
-        current_(a.current_), end_(a.end_), in(a.in.release()) { }
+    vector_iterator2stream(const self_type& a)
+        : m_current(a.m_current), m_end(a.m_end), in(a.in.release())
+    { }
 
     //! Standard stream method.
     const value_type& operator * () const
@@ -188,10 +172,10 @@ public:
     }
 
     //! Standard stream method.
-    Self_& operator ++ ()
+    self_type& operator ++ ()
     {
-        assert(end_ != current_);
-        ++current_;
+        assert(m_end != m_current);
+        ++m_current;
         ++(*in);
         if (UNLIKELY(empty()))
             delete_stream();
@@ -202,7 +186,7 @@ public:
     //! Standard stream method.
     bool empty() const
     {
-        return (current_ == end_);
+        return (m_current == m_end);
     }
     virtual ~vector_iterator2stream()
     {
@@ -218,24 +202,39 @@ public:
 //! which equals to (2 * number_of_disks)
 //! \return an instance of a stream object
 
-template <typename Tp_, typename AllocStr_, typename SzTp_, typename DiffTp_,
-          unsigned BlkSize_, typename PgTp_, unsigned PgSz_>
-vector_iterator2stream<stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> >
+template <typename ValueType, typename AllocStr, typename SizeType,
+          typename DiffType, unsigned BlockSize, typename PagerType,
+          unsigned PageSize>
+vector_iterator2stream<
+    stxxl::vector_iterator<ValueType, AllocStr, SizeType,
+                           DiffType, BlockSize, PagerType, PageSize>
+    >
 streamify(
-    stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> begin,
-    stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> end,
+    stxxl::vector_iterator<ValueType, AllocStr, SizeType,
+                           DiffType, BlockSize, PagerType, PageSize> begin,
+    stxxl::vector_iterator<ValueType, AllocStr, SizeType,
+                           DiffType, BlockSize, PagerType, PageSize> end,
     unsigned_type nbuffers = 0)
 {
     STXXL_VERBOSE1("streamify for vector_iterator range is called");
-    return vector_iterator2stream<stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> >
-               (begin, end, nbuffers);
+    return vector_iterator2stream<
+        stxxl::vector_iterator<ValueType, AllocStr, SizeType,
+                               DiffType, BlockSize, PagerType, PageSize>
+        >(begin, end, nbuffers);
 }
 
-template <typename Tp_, typename AllocStr_, typename SzTp_, typename DiffTp_,
-          unsigned BlkSize_, typename PgTp_, unsigned PgSz_>
-struct streamify_traits<stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> >
+template <typename ValueType, typename AllocStr, typename SizeType,
+          typename DiffType, unsigned BlockSize, typename PagerType,
+          unsigned PageSize>
+struct streamify_traits<
+    stxxl::vector_iterator<ValueType, AllocStr, SizeType,
+                           DiffType, BlockSize, PagerType, PageSize>
+    >
 {
-    typedef vector_iterator2stream<stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> > stream_type;
+    typedef vector_iterator2stream<
+            stxxl::vector_iterator<ValueType, AllocStr, SizeType,
+                                   DiffType, BlockSize, PagerType, PageSize>
+            > stream_type;
 };
 
 //! Input external \c stxxl::vector const iterator range to stream converter.
@@ -246,64 +245,81 @@ struct streamify_traits<stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, B
 //! which equals to (2 * number_of_disks)
 //! \return an instance of a stream object
 
-template <typename Tp_, typename AllocStr_, typename SzTp_, typename DiffTp_,
-          unsigned BlkSize_, typename PgTp_, unsigned PgSz_>
-vector_iterator2stream<stxxl::const_vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> >
+template <typename ValueType, typename AllocStr, typename SizeType,
+          typename DiffType, unsigned BlockSize, typename PagerType,
+          unsigned PageSize>
+vector_iterator2stream<
+    stxxl::const_vector_iterator<ValueType, AllocStr, SizeType,
+                                 DiffType, BlockSize, PagerType, PageSize>
+    >
 streamify(
-    stxxl::const_vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> begin,
-    stxxl::const_vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> end,
+    stxxl::const_vector_iterator<ValueType, AllocStr, SizeType,
+                                 DiffType, BlockSize, PagerType, PageSize> begin,
+    stxxl::const_vector_iterator<ValueType, AllocStr, SizeType,
+                                 DiffType, BlockSize, PagerType, PageSize> end,
     unsigned_type nbuffers = 0)
 {
     STXXL_VERBOSE1("streamify for const_vector_iterator range is called");
-    return vector_iterator2stream<stxxl::const_vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> >
-               (begin, end, nbuffers);
+    return vector_iterator2stream<
+        stxxl::const_vector_iterator<ValueType, AllocStr, SizeType,
+                                     DiffType, BlockSize, PagerType, PageSize>
+        >(begin, end, nbuffers);
 }
 
-template <typename Tp_, typename AllocStr_, typename SzTp_, typename DiffTp_,
-          unsigned BlkSize_, typename PgTp_, unsigned PgSz_>
-struct streamify_traits<stxxl::const_vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> >
+template <typename ValueType, typename AllocStr, typename SizeType,
+          typename DiffType, unsigned BlockSize, typename PagerType,
+          unsigned PageSize>
+struct streamify_traits<
+    stxxl::const_vector_iterator<ValueType, AllocStr, SizeType,
+                                 DiffType, BlockSize, PagerType, PageSize>
+    >
 {
-    typedef vector_iterator2stream<stxxl::const_vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> > stream_type;
+    typedef vector_iterator2stream<
+            stxxl::const_vector_iterator<ValueType, AllocStr, SizeType,
+                                         DiffType, BlockSize, PagerType, PageSize>
+            > stream_type;
 };
 
-
 //! Version of  \c iterator2stream. Switches between \c vector_iterator2stream and \c iterator2stream .
 //!
 //! small range switches between
 //! \c vector_iterator2stream and \c iterator2stream .
 //! iterator2stream is chosen if the input iterator range
 //! is small ( < B )
-template <class InputIterator_>
+template <class InputIterator>
 class vector_iterator2stream_sr
 {
-    vector_iterator2stream<InputIterator_>* vec_it_stream;
-    iterator2stream<InputIterator_>* it_stream;
+    vector_iterator2stream<InputIterator>* vec_it_stream;
+    iterator2stream<InputIterator>* it_stream;
 
-    typedef typename InputIterator_::block_type block_type;
+    typedef typename InputIterator::block_type block_type;
 
 public:
-    typedef vector_iterator2stream_sr<InputIterator_> Self_;
+    typedef vector_iterator2stream_sr<InputIterator> self_type;
 
     //! Standard stream typedef.
-    typedef typename std::iterator_traits<InputIterator_>::value_type value_type;
+    typedef typename std::iterator_traits<InputIterator>::value_type value_type;
 
-    vector_iterator2stream_sr(InputIterator_ begin, InputIterator_ end, unsigned_type nbuffers = 0)
+    vector_iterator2stream_sr(InputIterator begin, InputIterator end,
+                              unsigned_type nbuffers = 0)
     {
         if (end - begin < block_type::size)
         {
-            STXXL_VERBOSE1("vector_iterator2stream_sr::vector_iterator2stream_sr: Choosing iterator2stream<InputIterator_>");
-            it_stream = new iterator2stream<InputIterator_>(begin, end);
+            STXXL_VERBOSE1("vector_iterator2stream_sr::vector_iterator2stream_sr: Choosing iterator2stream<InputIterator>");
+            it_stream = new iterator2stream<InputIterator>(begin, end);
             vec_it_stream = NULL;
         }
         else
         {
-            STXXL_VERBOSE1("vector_iterator2stream_sr::vector_iterator2stream_sr: Choosing vector_iterator2stream<InputIterator_>");
+            STXXL_VERBOSE1("vector_iterator2stream_sr::vector_iterator2stream_sr: Choosing vector_iterator2stream<InputIterator>");
             it_stream = NULL;
-            vec_it_stream = new vector_iterator2stream<InputIterator_>(begin, end, nbuffers);
+            vec_it_stream = new vector_iterator2stream<InputIterator>(begin, end, nbuffers);
         }
     }
 
-    vector_iterator2stream_sr(const Self_& a) : vec_it_stream(a.vec_it_stream), it_stream(a.it_stream) { }
+    vector_iterator2stream_sr(const self_type& a)
+        : vec_it_stream(a.vec_it_stream), it_stream(a.it_stream)
+    { }
 
     //! Standard stream method.
     const value_type& operator * () const
@@ -323,7 +339,7 @@ public:
     }
 
     //! Standard stream method.
-    Self_& operator ++ ()
+    self_type& operator ++ ()
     {
         if (it_stream)
             ++(*it_stream);
@@ -331,7 +347,6 @@ public:
         else
             ++(*vec_it_stream);
 
-
         return *this;
     }
 
@@ -353,35 +368,52 @@ public:
     }
 };
 
-//! Version of  \c streamify. Switches from \c vector_iterator2stream to \c iterator2stream for small ranges.
-template <typename Tp_, typename AllocStr_, typename SzTp_, typename DiffTp_,
-          unsigned BlkSize_, typename PgTp_, unsigned PgSz_>
-vector_iterator2stream_sr<stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> >
+//! Version of \c streamify. Switches from \c vector_iterator2stream to \c
+//! iterator2stream for small ranges.
+template <typename ValueType, typename AllocStr, typename SizeType,
+          typename DiffType, unsigned BlockSize, typename PagerType,
+          unsigned PageSize>
+vector_iterator2stream_sr<
+    stxxl::vector_iterator<ValueType, AllocStr, SizeType,
+                           DiffType, BlockSize, PagerType, PageSize>
+    >
 streamify_sr(
-    stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> begin,
-    stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> end,
+    stxxl::vector_iterator<ValueType, AllocStr, SizeType,
+                           DiffType, BlockSize, PagerType, PageSize> begin,
+    stxxl::vector_iterator<ValueType, AllocStr, SizeType,
+                           DiffType, BlockSize, PagerType, PageSize> end,
     unsigned_type nbuffers = 0)
 {
     STXXL_VERBOSE1("streamify_sr for vector_iterator range is called");
-    return vector_iterator2stream_sr<stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> >
-               (begin, end, nbuffers);
+    return vector_iterator2stream_sr<
+        stxxl::vector_iterator<ValueType, AllocStr, SizeType,
+                               DiffType, BlockSize, PagerType, PageSize>
+        >(begin, end, nbuffers);
 }
 
-//! Version of  \c streamify. Switches from \c vector_iterator2stream to \c iterator2stream for small ranges.
-template <typename Tp_, typename AllocStr_, typename SzTp_, typename DiffTp_,
-          unsigned BlkSize_, typename PgTp_, unsigned PgSz_>
-vector_iterator2stream_sr<stxxl::const_vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> >
+//! Version of \c streamify. Switches from \c vector_iterator2stream to \c
+//! iterator2stream for small ranges.
+template <typename ValueType, typename AllocStr, typename SizeType,
+          typename DiffType, unsigned BlockSize, typename PagerType,
+          unsigned PageSize>
+vector_iterator2stream_sr<
+    stxxl::const_vector_iterator<ValueType, AllocStr, SizeType,
+                                 DiffType, BlockSize, PagerType, PageSize>
+    >
 streamify_sr(
-    stxxl::const_vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> begin,
-    stxxl::const_vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> end,
+    stxxl::const_vector_iterator<ValueType, AllocStr, SizeType,
+                                 DiffType, BlockSize, PagerType, PageSize> begin,
+    stxxl::const_vector_iterator<ValueType, AllocStr, SizeType,
+                                 DiffType, BlockSize, PagerType, PageSize> end,
     unsigned_type nbuffers = 0)
 {
     STXXL_VERBOSE1("streamify_sr for const_vector_iterator range is called");
-    return vector_iterator2stream_sr<stxxl::const_vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> >
-               (begin, end, nbuffers);
+    return vector_iterator2stream_sr<
+        stxxl::const_vector_iterator<ValueType, AllocStr, SizeType,
+                                     DiffType, BlockSize, PagerType, PageSize>
+        >(begin, end, nbuffers);
 }
 
-
 ////////////////////////////////////////////////////////////////////////
 //     MATERIALIZE                                                    //
 ////////////////////////////////////////////////////////////////////////
@@ -392,8 +424,8 @@ streamify_sr(
 //! \return value of the output iterator after all increments,
 //! i.e. points to the first unwritten value
 //! \pre Output (range) is large enough to hold the all elements in the input stream
-template <class OutputIterator_, class StreamAlgorithm_>
-OutputIterator_ materialize(StreamAlgorithm_& in, OutputIterator_ out)
+template <class OutputIterator, class StreamAlgorithm>
+OutputIterator materialize(StreamAlgorithm& in, OutputIterator out)
 {
     STXXL_VERBOSE_MATERIALIZE(STXXL_PRETTY_FUNCTION_NAME);
     while (!in.empty())
@@ -405,7 +437,6 @@ OutputIterator_ materialize(StreamAlgorithm_& in, OutputIterator_ out)
     return out;
 }
 
-
 //! Stores consecutively stream content to an output iterator range \b until end of the stream or end of the iterator range is reached.
 //! \param in stream to be stored used as source
 //! \param outbegin output iterator used as destination
@@ -415,8 +446,9 @@ OutputIterator_ materialize(StreamAlgorithm_& in, OutputIterator_ out)
 //! \pre Output range is large enough to hold the all elements in the input stream
 //!
 //! This function is useful when you do not know the length of the stream beforehand.
-template <class OutputIterator_, class StreamAlgorithm_>
-OutputIterator_ materialize(StreamAlgorithm_& in, OutputIterator_ outbegin, OutputIterator_ outend)
+template <class OutputIterator, class StreamAlgorithm>
+OutputIterator materialize(StreamAlgorithm& in,
+                           OutputIterator outbegin, OutputIterator outend)
 {
     STXXL_VERBOSE_MATERIALIZE(STXXL_PRETTY_FUNCTION_NAME);
     while ((!in.empty()) && outend != outbegin)
@@ -428,7 +460,6 @@ OutputIterator_ materialize(StreamAlgorithm_& in, OutputIterator_ outbegin, Outp
     return outbegin;
 }
 
-
 //! Stores consecutively stream content to an output \c stxxl::vector iterator \b until end of the stream or end of the iterator range is reached.
 //! \param in stream to be stored used as source
 //! \param outbegin output \c stxxl::vector iterator used as destination
@@ -440,20 +471,23 @@ OutputIterator_ materialize(StreamAlgorithm_& in, OutputIterator_ outbegin, Outp
 //! \pre Output range is large enough to hold the all elements in the input stream
 //!
 //! This function is useful when you do not know the length of the stream beforehand.
-template <typename Tp_, typename AllocStr_, typename SzTp_, typename DiffTp_,
-          unsigned BlkSize_, typename PgTp_, unsigned PgSz_, class StreamAlgorithm_>
-stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_>
-materialize(StreamAlgorithm_& in,
-            stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> outbegin,
-            stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> outend,
+template <typename ValueType, typename AllocStr, typename SizeType,
+          typename DiffType, unsigned BlockSize, typename PagerType,
+          unsigned PageSize, class StreamAlgorithm>
+stxxl::vector_iterator<ValueType, AllocStr, SizeType,
+                       DiffType, BlockSize, PagerType, PageSize>
+materialize(StreamAlgorithm& in,
+            stxxl::vector_iterator<ValueType, AllocStr, SizeType,
+                                   DiffType, BlockSize, PagerType, PageSize> outbegin,
+            stxxl::vector_iterator<ValueType, AllocStr, SizeType,
+                                   DiffType, BlockSize, PagerType, PageSize> outend,
             unsigned_type nbuffers = 0)
 {
     STXXL_VERBOSE_MATERIALIZE(STXXL_PRETTY_FUNCTION_NAME);
-    typedef stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> ExtIterator;
-    typedef stxxl::const_vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> ConstExtIterator;
+    typedef stxxl::vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> ExtIterator;
+    typedef stxxl::const_vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> ConstExtIterator;
     typedef buf_ostream<typename ExtIterator::block_type, typename ExtIterator::bids_container_iterator> buf_ostream_type;
 
-
     while (outbegin.block_offset())     //  go to the beginning of the block
     //  of the external vector
     {
@@ -511,7 +545,6 @@ materialize(StreamAlgorithm_& in,
     return outbegin;
 }
 
-
 //! Stores consecutively stream content to an output \c stxxl::vector iterator.
 //! \param in stream to be stored used as source
 //! \param out output \c stxxl::vector iterator used as destination
@@ -520,16 +553,19 @@ materialize(StreamAlgorithm_& in,
 //! \return value of the output iterator after all increments,
 //! i.e. points to the first unwritten value
 //! \pre Output (range) is large enough to hold the all elements in the input stream
-template <typename Tp_, typename AllocStr_, typename SzTp_, typename DiffTp_,
-          unsigned BlkSize_, typename PgTp_, unsigned PgSz_, class StreamAlgorithm_>
-stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_>
-materialize(StreamAlgorithm_& in,
-            stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> out,
+template <typename ValueType, typename AllocStr, typename SizeType,
+          typename DiffType, unsigned BlockSize, typename PagerType,
+          unsigned PageSize, class StreamAlgorithm>
+stxxl::vector_iterator<ValueType, AllocStr, SizeType,
+                       DiffType, BlockSize, PagerType, PageSize>
+materialize(StreamAlgorithm& in,
+            stxxl::vector_iterator<ValueType, AllocStr, SizeType,
+                                   DiffType, BlockSize, PagerType, PageSize> out,
             unsigned_type nbuffers = 0)
 {
     STXXL_VERBOSE_MATERIALIZE(STXXL_PRETTY_FUNCTION_NAME);
-    typedef stxxl::vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> ExtIterator;
-    typedef stxxl::const_vector_iterator<Tp_, AllocStr_, SzTp_, DiffTp_, BlkSize_, PgTp_, PgSz_> ConstExtIterator;
+    typedef stxxl::vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> ExtIterator;
+    typedef stxxl::const_vector_iterator<ValueType, AllocStr, SizeType, DiffType, BlockSize, PagerType, PageSize> ConstExtIterator;
     typedef buf_ostream<typename ExtIterator::block_type, typename ExtIterator::bids_container_iterator> buf_ostream_type;
 
     // on the I/O complexity of "materialize":
@@ -551,7 +587,6 @@ materialize(StreamAlgorithm_& in,
     if (nbuffers == 0)
         nbuffers = 2 * config::get_instance()->disks_number();
 
-
     out.flush();     // flush container
 
     // create buffered write stream for blocks
@@ -597,13 +632,12 @@ materialize(StreamAlgorithm_& in,
     return out;
 }
 
-
 //! Reads stream content and discards it.
 //! Useful where you do not need the processed stream anymore,
 //! but are just interested in side effects, or just for debugging.
 //! \param in input stream
-template <class StreamAlgorithm_>
-void discard(StreamAlgorithm_& in)
+template <class StreamAlgorithm>
+void discard(StreamAlgorithm& in)
 {
     while (!in.empty())
     {
@@ -612,7 +646,6 @@ void discard(StreamAlgorithm_& in)
     }
 }
 
-
 ////////////////////////////////////////////////////////////////////////
 //     GENERATE                                                       //
 ////////////////////////////////////////////////////////////////////////
@@ -620,7 +653,7 @@ void discard(StreamAlgorithm_& in)
 //! A model of stream that outputs data from an adaptable generator functor.
 //! For convenience use \c streamify function instead of direct instantiation
 //! of \c generator2stream .
-template <class Generator_, typename T = typename Generator_::value_type>
+template <class Generator, typename T = typename Generator::value_type>
 class generator2stream
 {
 public:
@@ -628,30 +661,31 @@ public:
     typedef T value_type;
 
 private:
-    Generator_ gen_;
-    value_type current_;
+    Generator gen_;
+    value_type m_current;
 
 public:
-    generator2stream(Generator_ g) :
-        gen_(g), current_(gen_()) { }
+    generator2stream(Generator g)
+        : gen_(g), m_current(gen_())
+    { }
 
-    generator2stream(const generator2stream& a) : gen_(a.gen_), current_(a.current_) { }
+    generator2stream(const generator2stream& a) : gen_(a.gen_), m_current(a.m_current) { }
 
     //! Standard stream method.
     const value_type& operator * () const
     {
-        return current_;
+        return m_current;
     }
 
     const value_type* operator -> () const
     {
-        return &current_;
+        return &m_current;
     }
 
     //! Standard stream method.
     generator2stream& operator ++ ()
     {
-        current_ = gen_();
+        m_current = gen_();
         return *this;
     }
 
@@ -665,13 +699,12 @@ public:
 //! Adaptable generator to stream converter.
 //! \param gen_ generator object
 //! \return an instance of a stream object
-template <class Generator_>
-generator2stream<Generator_> streamify(Generator_ gen_)
+template <class Generator>
+generator2stream<Generator> streamify(Generator gen_)
 {
-    return generator2stream<Generator_>(gen_);
+    return generator2stream<Generator>(gen_);
 }
 
-
 ////////////////////////////////////////////////////////////////////////
 //     TRANSFORM                                                      //
 ////////////////////////////////////////////////////////////////////////
@@ -680,44 +713,44 @@ struct Stopper { };
 
 //! Processes (up to) 6 input streams using given operation functor.
 //!
-//! \tparam Operation_ type of the operation (type of an
+//! \tparam Operation type of the operation (type of an
 //! adaptable functor that takes 6 parameters)
-//! \tparam Input1_ type of the 1st input
-//! \tparam Input2_ type of the 2nd input
-//! \tparam Input3_ type of the 3rd input
-//! \tparam Input4_ type of the 4th input
-//! \tparam Input5_ type of the 5th input
-//! \tparam Input6_ type of the 6th input
-template <class Operation_,
-          class Input1_,
-          class Input2_ = Stopper,
-          class Input3_ = Stopper,
-          class Input4_ = Stopper,
-          class Input5_ = Stopper,
-          class Input6_ = Stopper
+//! \tparam Input1 type of the 1st input
+//! \tparam Input2 type of the 2nd input
+//! \tparam Input3 type of the 3rd input
+//! \tparam Input4 type of the 4th input
+//! \tparam Input5 type of the 5th input
+//! \tparam Input6 type of the 6th input
+template <class Operation,
+          class Input1,
+          class Input2 = Stopper,
+          class Input3 = Stopper,
+          class Input4 = Stopper,
+          class Input5 = Stopper,
+          class Input6 = Stopper
           >
 class transform
 {
-    Operation_& op;
-    Input1_& i1;
-    Input2_& i2;
-    Input3_& i3;
-    Input4_& i4;
-    Input5_& i5;
-    Input6_& i6;
+    Operation& op;
+    Input1& i1;
+    Input2& i2;
+    Input3& i3;
+    Input4& i4;
+    Input5& i5;
+    Input6& i6;
 
 public:
     //! Standard stream typedef.
-    typedef typename Operation_::value_type value_type;
+    typedef typename Operation::value_type value_type;
 
 private:
     value_type current;
 
 public:
     //! Construction.
-    transform(Operation_& o, Input1_& i1_, Input2_& i2_, Input3_& i3_, Input4_& i4_,
-              Input5_& i5_, Input5_& i6_) :
-        op(o), i1(i1_), i2(i2_), i3(i3_), i4(i4_), i5(i5_), i6(i6_)
+    transform(Operation& o, Input1& i1_, Input2& i2_, Input3& i3_, Input4& i4_,
+              Input5& i5_, Input5& i6_)
+        : op(o), i1(i1_), i2(i2_), i3(i3_), i4(i4_), i5(i5_), i6(i6_)
     {
         if (!empty())
             current = op(*i1, *i2, *i3, *i4, *i5, *i6);
@@ -765,28 +798,28 @@ public:
 
 //! Processes an input stream using given operation functor.
 //!
-//! \tparam Operation_ type of the operation (type of an
+//! \tparam Operation type of the operation (type of an
 //! adaptable functor that takes 1 parameter)
-//! \tparam Input1_ type of the input
+//! \tparam Input1 type of the input
 //! \remark This is a specialization of \c transform .
-template <class Operation_,
-          class Input1_
+template <class Operation,
+          class Input1
           >
-class transform<Operation_, Input1_, Stopper, Stopper, Stopper, Stopper, Stopper>
+class transform<Operation, Input1, Stopper, Stopper, Stopper, Stopper, Stopper>
 {
-    Operation_& op;
-    Input1_& i1;
+    Operation& op;
+    Input1& i1;
 
 public:
     //! Standard stream typedef.
-    typedef typename Operation_::value_type value_type;
+    typedef typename Operation::value_type value_type;
 
 private:
     value_type current;
 
 public:
     //! Construction.
-    transform(Operation_& o, Input1_& i1_) : op(o), i1(i1_)
+    transform(Operation& o, Input1& i1_) : op(o), i1(i1_)
     {
         if (!empty())
             current = op(*i1);
@@ -820,38 +853,37 @@ public:
     }
 };
 
-
 ////////////////////////////////////////////////////////////////////////
 //     TRANSFORM (2 input streams)                                    //
 ////////////////////////////////////////////////////////////////////////
 
 //! Processes 2 input streams using given operation functor.
 //!
-//! \tparam Operation_ type of the operation (type of an
+//! \tparam Operation type of the operation (type of an
 //! adaptable functor that takes 2 parameters)
-//! \tparam Input1_ type of the 1st input
-//! \tparam Input2_ type of the 2nd input
+//! \tparam Input1 type of the 1st input
+//! \tparam Input2 type of the 2nd input
 //! \remark This is a specialization of \c transform .
-template <class Operation_,
-          class Input1_,
-          class Input2_
+template <class Operation,
+          class Input1,
+          class Input2
           >
-class transform<Operation_, Input1_, Input2_, Stopper, Stopper, Stopper, Stopper>
+class transform<Operation, Input1, Input2, Stopper, Stopper, Stopper, Stopper>
 {
-    Operation_& op;
-    Input1_& i1;
-    Input2_& i2;
+    Operation& op;
+    Input1& i1;
+    Input2& i2;
 
 public:
     //! Standard stream typedef.
-    typedef typename Operation_::value_type value_type;
+    typedef typename Operation::value_type value_type;
 
 private:
     value_type current;
 
 public:
     //! Construction.
-    transform(Operation_& o, Input1_& i1_, Input2_& i2_) : op(o), i1(i1_), i2(i2_)
+    transform(Operation& o, Input1& i1_, Input2& i2_) : op(o), i1(i1_), i2(i2_)
     {
         if (!empty())
             current = op(*i1, *i2);
@@ -886,42 +918,41 @@ public:
     }
 };
 
-
 ////////////////////////////////////////////////////////////////////////
 //     TRANSFORM (3 input streams)                                    //
 ////////////////////////////////////////////////////////////////////////
 
 //! Processes 3 input streams using given operation functor.
 //!
-//! \tparam Operation_ type of the operation (type of an
+//! \tparam Operation type of the operation (type of an
 //! adaptable functor that takes 3 parameters)
-//! \tparam Input1_ type of the 1st input
-//! \tparam Input2_ type of the 2nd input
-//! \tparam Input3_ type of the 3rd input
+//! \tparam Input1 type of the 1st input
+//! \tparam Input2 type of the 2nd input
+//! \tparam Input3 type of the 3rd input
 //! \remark This is a specialization of \c transform .
-template <class Operation_,
-          class Input1_,
-          class Input2_,
-          class Input3_
+template <class Operation,
+          class Input1,
+          class Input2,
+          class Input3
           >
-class transform<Operation_, Input1_, Input2_, Input3_, Stopper, Stopper, Stopper>
+class transform<Operation, Input1, Input2, Input3, Stopper, Stopper, Stopper>
 {
-    Operation_& op;
-    Input1_& i1;
-    Input2_& i2;
-    Input3_& i3;
+    Operation& op;
+    Input1& i1;
+    Input2& i2;
+    Input3& i3;
 
 public:
     //! Standard stream typedef.
-    typedef typename Operation_::value_type value_type;
+    typedef typename Operation::value_type value_type;
 
 private:
     value_type current;
 
 public:
     //! Construction.
-    transform(Operation_& o, Input1_& i1_, Input2_& i2_, Input3_& i3_) :
-        op(o), i1(i1_), i2(i2_), i3(i3_)
+    transform(Operation& o, Input1& i1_, Input2& i2_, Input3& i3_)
+        : op(o), i1(i1_), i2(i2_), i3(i3_)
     {
         if (!empty())
             current = op(*i1, *i2, *i3);
@@ -957,45 +988,44 @@ public:
     }
 };
 
-
 ////////////////////////////////////////////////////////////////////////
 //     TRANSFORM (4 input streams)                                    //
 ////////////////////////////////////////////////////////////////////////
 
 //! Processes 4 input streams using given operation functor.
 //!
-//! \tparam Operation_ type of the operation (type of an
+//! \tparam Operation type of the operation (type of an
 //! adaptable functor that takes 4 parameters)
-//! \tparam Input1_ type of the 1st input
-//! \tparam Input2_ type of the 2nd input
-//! \tparam Input3_ type of the 3rd input
-//! \tparam Input4_ type of the 4th input
+//! \tparam Input1 type of the 1st input
+//! \tparam Input2 type of the 2nd input
+//! \tparam Input3 type of the 3rd input
+//! \tparam Input4 type of the 4th input
 //! \remark This is a specialization of \c transform .
-template <class Operation_,
-          class Input1_,
-          class Input2_,
-          class Input3_,
-          class Input4_
+template <class Operation,
+          class Input1,
+          class Input2,
+          class Input3,
+          class Input4
           >
-class transform<Operation_, Input1_, Input2_, Input3_, Input4_, Stopper, Stopper>
+class transform<Operation, Input1, Input2, Input3, Input4, Stopper, Stopper>
 {
-    Operation_& op;
-    Input1_& i1;
-    Input2_& i2;
-    Input3_& i3;
-    Input4_& i4;
+    Operation& op;
+    Input1& i1;
+    Input2& i2;
+    Input3& i3;
+    Input4& i4;
 
 public:
     //! Standard stream typedef.
-    typedef typename Operation_::value_type value_type;
+    typedef typename Operation::value_type value_type;
 
 private:
     value_type current;
 
 public:
     //! Construction.
-    transform(Operation_& o, Input1_& i1_, Input2_& i2_, Input3_& i3_, Input4_& i4_) :
-        op(o), i1(i1_), i2(i2_), i3(i3_), i4(i4_)
+    transform(Operation& o, Input1& i1_, Input2& i2_, Input3& i3_, Input4& i4_)
+        : op(o), i1(i1_), i2(i2_), i3(i3_), i4(i4_)
     {
         if (!empty())
             current = op(*i1, *i2, *i3, *i4);
@@ -1032,49 +1062,48 @@ public:
     }
 };
 
-
 ////////////////////////////////////////////////////////////////////////
 //     TRANSFORM (5 input streams)                                    //
 ////////////////////////////////////////////////////////////////////////
 
 //! Processes 5 input streams using given operation functor.
 //!
-//! \tparam Operation_ type of the operation (type of an
+//! \tparam Operation type of the operation (type of an
 //! adaptable functor that takes 5 parameters)
-//! \tparam Input1_ type of the 1st input
-//! \tparam Input2_ type of the 2nd input
-//! \tparam Input3_ type of the 3rd input
-//! \tparam Input4_ type of the 4th input
-//! \tparam Input5_ type of the 5th input
+//! \tparam Input1 type of the 1st input
+//! \tparam Input2 type of the 2nd input
+//! \tparam Input3 type of the 3rd input
+//! \tparam Input4 type of the 4th input
+//! \tparam Input5 type of the 5th input
 //! \remark This is a specialization of \c transform .
-template <class Operation_,
-          class Input1_,
-          class Input2_,
-          class Input3_,
-          class Input4_,
-          class Input5_
+template <class Operation,
+          class Input1,
+          class Input2,
+          class Input3,
+          class Input4,
+          class Input5
           >
-class transform<Operation_, Input1_, Input2_, Input3_, Input4_, Input5_, Stopper>
+class transform<Operation, Input1, Input2, Input3, Input4, Input5, Stopper>
 {
-    Operation_& op;
-    Input1_& i1;
-    Input2_& i2;
-    Input3_& i3;
-    Input4_& i4;
-    Input5_& i5;
+    Operation& op;
+    Input1& i1;
+    Input2& i2;
+    Input3& i3;
+    Input4& i4;
+    Input5& i5;
 
 public:
     //! Standard stream typedef.
-    typedef typename Operation_::value_type value_type;
+    typedef typename Operation::value_type value_type;
 
 private:
     value_type current;
 
 public:
     //! Construction.
-    transform(Operation_& o, Input1_& i1_, Input2_& i2_, Input3_& i3_, Input4_& i4_,
-              Input5_& i5_) :
-        op(o), i1(i1_), i2(i2_), i3(i3_), i4(i4_), i5(i5_)
+    transform(Operation& o, Input1& i1_, Input2& i2_, Input3& i3_,
+              Input4& i4_, Input5& i5_)
+        : op(o), i1(i1_), i2(i2_), i3(i3_), i4(i4_), i5(i5_)
     {
         if (!empty())
             current = op(*i1, *i2, *i3, *i4, *i5);
@@ -1112,44 +1141,43 @@ public:
     }
 };
 
-
 ////////////////////////////////////////////////////////////////////////
 //     MAKE TUPLE                                                     //
 ////////////////////////////////////////////////////////////////////////
 
 //! Creates stream of 6-tuples from 6 input streams.
 //!
-//! \tparam Input1_ type of the 1st input
-//! \tparam Input2_ type of the 2nd input
-//! \tparam Input3_ type of the 3rd input
-//! \tparam Input4_ type of the 4th input
-//! \tparam Input5_ type of the 5th input
-//! \tparam Input6_ type of the 6th input
-template <class Input1_,
-          class Input2_,
-          class Input3_ = Stopper,
-          class Input4_ = Stopper,
-          class Input5_ = Stopper,
-          class Input6_ = Stopper
+//! \tparam Input1 type of the 1st input
+//! \tparam Input2 type of the 2nd input
+//! \tparam Input3 type of the 3rd input
+//! \tparam Input4 type of the 4th input
+//! \tparam Input5 type of the 5th input
+//! \tparam Input6 type of the 6th input
+template <class Input1,
+          class Input2,
+          class Input3 = Stopper,
+          class Input4 = Stopper,
+          class Input5 = Stopper,
+          class Input6 = Stopper
           >
 class make_tuple
 {
-    Input1_& i1;
-    Input2_& i2;
-    Input3_& i3;
-    Input4_& i4;
-    Input5_& i5;
-    Input6_& i6;
+    Input1& i1;
+    Input2& i2;
+    Input3& i3;
+    Input4& i4;
+    Input5& i5;
+    Input6& i6;
 
 public:
     //! Standard stream typedef.
     typedef typename stxxl::tuple<
-            typename Input1_::value_type,
-            typename Input2_::value_type,
-            typename Input3_::value_type,
-            typename Input4_::value_type,
-            typename Input5_::value_type,
-            typename Input6_::value_type
+            typename Input1::value_type,
+            typename Input2::value_type,
+            typename Input3::value_type,
+            typename Input4::value_type,
+            typename Input5::value_type,
+            typename Input6::value_type
             > value_type;
 
 private:
@@ -1157,15 +1185,13 @@ private:
 
 public:
     //! Construction.
-    make_tuple(
-        Input1_& i1_,
-        Input2_& i2_,
-        Input3_& i3_,
-        Input4_& i4_,
-        Input5_& i5_,
-        Input6_& i6_
-        ) :
-        i1(i1_), i2(i2_), i3(i3_), i4(i4_), i5(i5_), i6(i6_)
+    make_tuple(Input1& i1_,
+               Input2& i2_,
+               Input3& i3_,
+               Input4& i4_,
+               Input5& i5_,
+               Input6& i6_)
+        : i1(i1_), i2(i2_), i3(i3_), i4(i4_), i5(i5_), i6(i6_)
     {
         if (!empty())
             current = value_type(*i1, *i2, *i3, *i4, *i5, *i6);
@@ -1206,25 +1232,24 @@ public:
     }
 };
 
-
 //! Creates stream of 2-tuples (pairs) from 2 input streams.
 //!
-//! \tparam Input1_ type of the 1st input
-//! \tparam Input2_ type of the 2nd input
+//! \tparam Input1 type of the 1st input
+//! \tparam Input2 type of the 2nd input
 //! \remark A specialization of \c make_tuple .
-template <class Input1_,
-          class Input2_
+template <class Input1,
+          class Input2
           >
-class make_tuple<Input1_, Input2_, Stopper, Stopper, Stopper, Stopper>
+class make_tuple<Input1, Input2, Stopper, Stopper, Stopper, Stopper>
 {
-    Input1_& i1;
-    Input2_& i2;
+    Input1& i1;
+    Input2& i2;
 
 public:
     //! Standard stream typedef.
     typedef typename stxxl::tuple<
-            typename Input1_::value_type,
-            typename Input2_::value_type
+            typename Input1::value_type,
+            typename Input2::value_type
             > value_type;
 
 private:
@@ -1232,11 +1257,9 @@ private:
 
 public:
     //! Construction.
-    make_tuple(
-        Input1_& i1_,
-        Input2_& i2_
-        ) :
-        i1(i1_), i2(i2_)
+    make_tuple(Input1& i1_,
+               Input2& i2_)
+        : i1(i1_), i2(i2_)
     {
         if (!empty())
             current = value_type(*i1, *i2);
@@ -1274,26 +1297,26 @@ public:
 
 //! Creates stream of 3-tuples from 3 input streams.
 //!
-//! \tparam Input1_ type of the 1st input
-//! \tparam Input2_ type of the 2nd input
-//! \tparam Input3_ type of the 3rd input
+//! \tparam Input1 type of the 1st input
+//! \tparam Input2 type of the 2nd input
+//! \tparam Input3 type of the 3rd input
 //! \remark A specialization of \c make_tuple .
-template <class Input1_,
-          class Input2_,
-          class Input3_
+template <class Input1,
+          class Input2,
+          class Input3
           >
-class make_tuple<Input1_, Input2_, Input3_, Stopper, Stopper, Stopper>
+class make_tuple<Input1, Input2, Input3, Stopper, Stopper, Stopper>
 {
-    Input1_& i1;
-    Input2_& i2;
-    Input3_& i3;
+    Input1& i1;
+    Input2& i2;
+    Input3& i3;
 
 public:
     //! Standard stream typedef.
     typedef typename stxxl::tuple<
-            typename Input1_::value_type,
-            typename Input2_::value_type,
-            typename Input3_::value_type
+            typename Input1::value_type,
+            typename Input2::value_type,
+            typename Input3::value_type
             > value_type;
 
 private:
@@ -1301,12 +1324,10 @@ private:
 
 public:
     //! Construction.
-    make_tuple(
-        Input1_& i1_,
-        Input2_& i2_,
-        Input3_& i3_
-        ) :
-        i1(i1_), i2(i2_), i3(i3_)
+    make_tuple(Input1& i1_,
+               Input2& i2_,
+               Input3& i3_)
+        : i1(i1_), i2(i2_), i3(i3_)
     {
         if (!empty())
             current = value_type(*i1, *i2, *i3);
@@ -1345,30 +1366,30 @@ public:
 
 //! Creates stream of 4-tuples from 4 input streams.
 //!
-//! \tparam Input1_ type of the 1st input
-//! \tparam Input2_ type of the 2nd input
-//! \tparam Input3_ type of the 3rd input
-//! \tparam Input4_ type of the 4th input
+//! \tparam Input1 type of the 1st input
+//! \tparam Input2 type of the 2nd input
+//! \tparam Input3 type of the 3rd input
+//! \tparam Input4 type of the 4th input
 //! \remark A specialization of \c make_tuple .
-template <class Input1_,
-          class Input2_,
-          class Input3_,
-          class Input4_
+template <class Input1,
+          class Input2,
+          class Input3,
+          class Input4
           >
-class make_tuple<Input1_, Input2_, Input3_, Input4_, Stopper, Stopper>
+class make_tuple<Input1, Input2, Input3, Input4, Stopper, Stopper>
 {
-    Input1_& i1;
-    Input2_& i2;
-    Input3_& i3;
-    Input4_& i4;
+    Input1& i1;
+    Input2& i2;
+    Input3& i3;
+    Input4& i4;
 
 public:
     //! Standard stream typedef.
     typedef typename stxxl::tuple<
-            typename Input1_::value_type,
-            typename Input2_::value_type,
-            typename Input3_::value_type,
-            typename Input4_::value_type
+            typename Input1::value_type,
+            typename Input2::value_type,
+            typename Input3::value_type,
+            typename Input4::value_type
             > value_type;
 
 private:
@@ -1376,13 +1397,11 @@ private:
 
 public:
     //! Construction.
-    make_tuple(
-        Input1_& i1_,
-        Input2_& i2_,
-        Input3_& i3_,
-        Input4_& i4_
-        ) :
-        i1(i1_), i2(i2_), i3(i3_), i4(i4_)
+    make_tuple(Input1& i1_,
+               Input2& i2_,
+               Input3& i3_,
+               Input4& i4_)
+        : i1(i1_), i2(i2_), i3(i3_), i4(i4_)
     {
         if (!empty())
             current = value_type(*i1, *i2, *i3, *i4);
@@ -1423,35 +1442,35 @@ public:
 
 //! Creates stream of 5-tuples from 5 input streams.
 //!
-//! \tparam Input1_ type of the 1st input
-//! \tparam Input2_ type of the 2nd input
-//! \tparam Input3_ type of the 3rd input
-//! \tparam Input4_ type of the 4th input
-//! \tparam Input5_ type of the 5th input
+//! \tparam Input1 type of the 1st input
+//! \tparam Input2 type of the 2nd input
+//! \tparam Input3 type of the 3rd input
+//! \tparam Input4 type of the 4th input
+//! \tparam Input5 type of the 5th input
 //! \remark A specialization of \c make_tuple .
 template <
-    class Input1_,
-    class Input2_,
-    class Input3_,
-    class Input4_,
-    class Input5_
+    class Input1,
+    class Input2,
+    class Input3,
+    class Input4,
+    class Input5
     >
-class make_tuple<Input1_, Input2_, Input3_, Input4_, Input5_, Stopper>
+class make_tuple<Input1, Input2, Input3, Input4, Input5, Stopper>
 {
-    Input1_& i1;
-    Input2_& i2;
-    Input3_& i3;
-    Input4_& i4;
-    Input5_& i5;
+    Input1& i1;
+    Input2& i2;
+    Input3& i3;
+    Input4& i4;
+    Input5& i5;
 
 public:
     //! Standard stream typedef.
     typedef typename stxxl::tuple<
-            typename Input1_::value_type,
-            typename Input2_::value_type,
-            typename Input3_::value_type,
-            typename Input4_::value_type,
-            typename Input5_::value_type
+            typename Input1::value_type,
+            typename Input2::value_type,
+            typename Input3::value_type,
+            typename Input4::value_type,
+            typename Input5::value_type
             > value_type;
 
 private:
@@ -1459,14 +1478,12 @@ private:
 
 public:
     //! Construction.
-    make_tuple(
-        Input1_& i1_,
-        Input2_& i2_,
-        Input3_& i3_,
-        Input4_& i4_,
-        Input5_& i5_
-        ) :
-        i1(i1_), i2(i2_), i3(i3_), i4(i4_), i5(i5_)
+    make_tuple(Input1& i1_,
+               Input2& i2_,
+               Input3& i3_,
+               Input4& i4_,
+               Input5& i5_)
+        : i1(i1_), i2(i2_), i3(i3_), i4(i4_), i5(i5_)
     {
         if (!empty())
             current = value_type(*i1, *i2, *i3, *i4, *i5);
@@ -1506,17 +1523,14 @@ public:
     }
 };
 
-
 //! \}
 
 } // namespace stream
 
 STXXL_END_NAMESPACE
 
-
 #include <stxxl/bits/stream/choose.h>
 #include <stxxl/bits/stream/unique.h>
 
-
 #endif // !STXXL_STREAM_STREAM_HEADER
 // vim: et:ts=4:sw=4
diff --git a/include/stxxl/bits/stream/unique.h b/include/stxxl/bits/stream/unique.h
index 6a793c7..c01c1a4 100644
--- a/include/stxxl/bits/stream/unique.h
+++ b/include/stxxl/bits/stream/unique.h
@@ -16,7 +16,6 @@
 
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 //! Stream package subnamespace.
@@ -26,13 +25,13 @@ namespace stream {
 //     UNIQUE                                                         //
 ////////////////////////////////////////////////////////////////////////
 
-struct dummy_cmp_unique_ { };
+struct dummy_cmp_unique { };
 
 //! Equivalent to std::unique algorithms.
 //!
 //! Removes consecutive duplicates from the stream.
 //! Uses BinaryPredicate to compare elements of the stream
-template <class Input, class BinaryPredicate = dummy_cmp_unique_>
+template <class Input, class BinaryPredicate = dummy_cmp_unique>
 class unique
 {
     Input& input;
@@ -43,7 +42,8 @@ public:
     //! Standard stream typedef.
     typedef typename Input::value_type value_type;
 
-    unique(Input& input_, BinaryPredicate binary_pred_) : input(input_), binary_pred(binary_pred_)
+    unique(Input& input_, BinaryPredicate binary_pred_)
+        : input(input_), binary_pred(binary_pred_)
     {
         if (!input.empty())
             current = *input;
@@ -82,7 +82,7 @@ public:
 //!
 //! Removes consecutive duplicates from the stream.
 template <class Input>
-class unique<Input, dummy_cmp_unique_>
+class unique<Input, dummy_cmp_unique>
 {
     Input& input;
     typename Input::value_type current;
diff --git a/include/stxxl/bits/unused.h b/include/stxxl/bits/unused.h
index feb2bcb..48cb297 100644
--- a/include/stxxl/bits/unused.h
+++ b/include/stxxl/bits/unused.h
@@ -16,7 +16,6 @@
 
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 template <typename U>
diff --git a/include/stxxl/bits/utils/malloc.h b/include/stxxl/bits/utils/malloc.h
index aff87be..20eb001 100644
--- a/include/stxxl/bits/utils/malloc.h
+++ b/include/stxxl/bits/utils/malloc.h
@@ -25,10 +25,8 @@
 #include <stxxl/bits/namespace.h>
 #include <stxxl/bits/unused.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-
 //! Access to some useful malloc statistics.
 
 //! malloc is default C++ allocator
diff --git a/include/stxxl/bits/verbose.h b/include/stxxl/bits/verbose.h
index 730757b..d8817e5 100644
--- a/include/stxxl/bits/verbose.h
+++ b/include/stxxl/bits/verbose.h
@@ -21,7 +21,6 @@
 #include <string>
 #include <stxxl/bits/unused.h>
 
-
 #define _STXXL_PRNT_COUT        (1 << 0)
 #define _STXXL_PRNT_CERR        (1 << 1)
 #define _STXXL_PRNT_LOG         (1 << 2)
@@ -34,14 +33,12 @@
 #define _STXXL_PRINT_FLAGS_ERROR    (_STXXL_PRNT_CERR | _STXXL_PRNT_ERRLOG)
 #define _STXXL_PRINT_FLAGS_VERBOSE  (_STXXL_PRINT_FLAGS_DEFAULT | _STXXL_PRNT_TIMESTAMP | _STXXL_PRNT_THREAD_ID)
 
-
 STXXL_BEGIN_NAMESPACE
 
 void print_msg(const char* label, const std::string& msg, unsigned flags);
 
 STXXL_END_NAMESPACE
 
-
 #define _STXXL_PRINT(label, message, flags)                                  \
     do {                                                                     \
         std::ostringstream str_;                                             \
@@ -49,8 +46,13 @@ STXXL_END_NAMESPACE
         stxxl::print_msg(label, str_.str(), flags | _STXXL_PRNT_ADDNEWLINE); \
     } while (false)
 
-#define _STXXL_NOT_VERBOSE do { } while (false)
-
+#define _STXXL_NOT_VERBOSE(message)  \
+    do {                             \
+        if (0) {                     \
+            std::ostringstream str_; \
+            str_ << message;         \
+        }                            \
+    } while (false)
 
 #ifdef STXXL_FORCE_VERBOSE_LEVEL
 #undef STXXL_VERBOSE_LEVEL
@@ -67,22 +69,20 @@ STXXL_END_NAMESPACE
 #define STXXL_VERBOSE_LEVEL -1
 #endif
 
-
 #if STXXL_VERBOSE_LEVEL > -10
  #define STXXL_MSG(x) _STXXL_PRINT("STXXL-MSG", x, _STXXL_PRINT_FLAGS_DEFAULT)
 #else
 // Please do not report STXXL problems with STXXL_MSG disabled!
- #define STXXL_MSG(x) _STXXL_NOT_VERBOSE
+ #define STXXL_MSG(x) _STXXL_NOT_VERBOSE(x)
 #endif
 
 #if STXXL_VERBOSE_LEVEL > -100
  #define STXXL_ERRMSG(x) _STXXL_PRINT("STXXL-ERRMSG", x, _STXXL_PRINT_FLAGS_ERROR)
 #else
 // Please do not report STXXL problems with STXXL_ERRMSG disabled!
- #define STXXL_ERRMSG(x) _STXXL_NOT_VERBOSE
+ #define STXXL_ERRMSG(x) _STXXL_NOT_VERBOSE(x)
 #endif
 
-
 // STXXL_VERBOSE0 should be used for current debugging activity only,
 // and afterwards be replaced by STXXL_VERBOSE1 or higher.
 // Code that actively uses STXXL_VERBOSE0 should never get into a release.
@@ -90,13 +90,13 @@ STXXL_END_NAMESPACE
 #if STXXL_VERBOSE_LEVEL > -1
  #define STXXL_VERBOSE0(x) _STXXL_PRINT("STXXL-VERBOSE0", x, _STXXL_PRINT_FLAGS_VERBOSE)
 #else
- #define STXXL_VERBOSE0(x) _STXXL_NOT_VERBOSE
+ #define STXXL_VERBOSE0(x) _STXXL_NOT_VERBOSE(x)
 #endif
 
 #if STXXL_VERBOSE_LEVEL > 0
  #define STXXL_VERBOSE1(x) _STXXL_PRINT("STXXL-VERBOSE1", x, _STXXL_PRINT_FLAGS_VERBOSE)
 #else
- #define STXXL_VERBOSE1(x) _STXXL_NOT_VERBOSE
+ #define STXXL_VERBOSE1(x) _STXXL_NOT_VERBOSE(x)
 #endif
 
 #define STXXL_VERBOSE(x) STXXL_VERBOSE1(x)
@@ -104,15 +104,29 @@ STXXL_END_NAMESPACE
 #if STXXL_VERBOSE_LEVEL > 1
  #define STXXL_VERBOSE2(x) _STXXL_PRINT("STXXL-VERBOSE2", x, _STXXL_PRINT_FLAGS_VERBOSE)
 #else
- #define STXXL_VERBOSE2(x) _STXXL_NOT_VERBOSE
+ #define STXXL_VERBOSE2(x) _STXXL_NOT_VERBOSE(x)
 #endif
 
 #if STXXL_VERBOSE_LEVEL > 2
  #define STXXL_VERBOSE3(x) _STXXL_PRINT("STXXL-VERBOSE3", x, _STXXL_PRINT_FLAGS_VERBOSE)
 #else
- #define STXXL_VERBOSE3(x) _STXXL_NOT_VERBOSE
+ #define STXXL_VERBOSE3(x) _STXXL_NOT_VERBOSE(x)
 #endif
 
+// STXXL_VERBOSE[0123]_THIS prefixes "[0xaddress]" and then calls the version
+// without _THIS.
+
+#define STXXL_VERBOSE0_THIS(x) \
+    STXXL_VERBOSE0("[" << static_cast<void*>(this) << "] " << x)
+
+#define STXXL_VERBOSE1_THIS(x) \
+    STXXL_VERBOSE1("[" << static_cast<void*>(this) << "] " << x)
+
+#define STXXL_VERBOSE2_THIS(x) \
+    STXXL_VERBOSE2("[" << static_cast<void*>(this) << "] " << x)
+
+#define STXXL_VERBOSE3_THIS(x) \
+    STXXL_VERBOSE3("[" << static_cast<void*>(this) << "] " << x)
 
 // STXXL_CHECK is an assertion macro for unit tests, which contrarily to
 // assert() also works in release builds. These macros should ONLY be used in
diff --git a/lib/common/rand.cpp b/include/stxxl/unordered_map
similarity index 52%
copy from lib/common/rand.cpp
copy to include/stxxl/unordered_map
index efffe22..6f7defa 100644
--- a/lib/common/rand.cpp
+++ b/include/stxxl/unordered_map
@@ -1,23 +1,14 @@
+// -*- mode: c++ -*-
 /***************************************************************************
- *  lib/common/rand.cpp
+ *  include/stxxl/unordered_map
  *
  *  Part of the STXXL. See http://stxxl.sourceforge.net
  *
- *  Copyright (C) 2002 Roman Dementiev <dementiev at mpi-sb.mpg.de>
- *  Copyright (C) 2007 Andreas Beckmann <beckmann at mpi-inf.mpg.de>
+ *  Copyright (C) 2008 Markus Westphal <marwes at users.sourceforge.net>
  *
  *  Distributed under the Boost Software License, Version 1.0.
  *  (See accompanying file LICENSE_1_0.txt or copy at
  *  http://www.boost.org/LICENSE_1_0.txt)
  **************************************************************************/
 
-#include <stxxl/bits/common/rand.h>
-#include <stxxl/bits/common/seed.h>
-#include <stxxl/bits/namespace.h>
-
-
-STXXL_BEGIN_NAMESPACE
-
-unsigned ran32State = get_next_seed();
-
-STXXL_END_NAMESPACE
+#include <stxxl/bits/containers/unordered_map.h>
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index eceeb1d..e971a03 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -52,17 +52,22 @@ set(LIBSTXXL_SOURCES
   )
 
 if(NOT MSVC)
-
   # additional sources for non Visual Studio builds
   set(LIBSTXXL_SOURCES ${LIBSTXXL_SOURCES}
-
     io/mmap_file.cpp
     io/simdisk_file.cpp
-
     )
-
 endif(NOT MSVC)
 
+if(STXXL_HAVE_LINUXAIO_FILE)
+  # additional sources fo LinuxAIO fileio access method
+  set(LIBSTXXL_SOURCES ${LIBSTXXL_SOURCES}
+    io/linuxaio_file.cpp
+    io/linuxaio_queue.cpp
+    io/linuxaio_request.cpp
+    )
+endif()
+
 # tell top-level cmakelists which library we build
 set(STXXL_EXPORTED_LIBS stxxl)
 
diff --git a/lib/algo/async_schedule.cpp b/lib/algo/async_schedule.cpp
index dd52319..b372fa4 100644
--- a/lib/algo/async_schedule.cpp
+++ b/lib/algo/async_schedule.cpp
@@ -16,8 +16,8 @@
 // and queued writing on parallel disks, 2005
 // DOI: 10.1137/S0097539703431573
 
-
 #include <stxxl/bits/algo/async_schedule.h>
+#include <stxxl/bits/common/simple_vector.h>
 #include <stxxl/bits/common/types.h>
 #include <stxxl/bits/io/file.h>
 #include <stxxl/bits/namespace.h>
@@ -32,7 +32,6 @@
 #include <utility>
 #include <vector>
 
-
 STXXL_BEGIN_NAMESPACE
 
 namespace async_schedule_local {
@@ -62,11 +61,10 @@ struct write_time_cmp : public std::binary_function<write_time_pair, write_time_
     }
 };
 
-
 static inline int_type get_disk(int_type i, const int_type* disks, int_type D)
 {
     int_type disk = disks[i];
-    if (disk == file::NO_ALLOCATOR)
+    if (disk == (int_type)file::DEFAULT_DEVICE_ID)
         disk = D;      // remap to sentinel
     assert(0 <= disk && disk <= D);
     return disk;
@@ -82,13 +80,13 @@ int_type simulate_async_write(
     typedef std::priority_queue<sim_event, std::vector<sim_event>, sim_event_cmp> event_queue_type;
     typedef std::queue<int_type> disk_queue_type;
     assert(L >= D);
-    disk_queue_type* disk_queues = new disk_queue_type[D + 1];       // + sentinel for remapping NO_ALLOCATOR
+    simple_vector<disk_queue_type> disk_queues(D + 1); // + sentinel for remapping NO_ALLOCATOR
     event_queue_type event_queue;
 
     int_type m = m_init;
     int_type i = L - 1;
     int_type oldtime = 0;
-    bool* disk_busy = new bool[D + 1];
+    simple_vector<bool> disk_busy(D + 1);
 
     while (m && (i >= 0))
     {
@@ -120,7 +118,6 @@ int_type simulate_async_write(
             oldtime = cur.timestamp;
         }
 
-
         STXXL_VERBOSE1("Block " << cur.iblock << " put out, time " << cur.timestamp << " disk: " << disks[cur.iblock]);
         o_time[cur.iblock] = std::pair<int_type, int_type>(cur.iblock, cur.timestamp);
 
@@ -163,16 +160,11 @@ int_type simulate_async_write(
     for (int_type i = 0; i <= D; i++)
         assert(disk_queues[i].empty());
 
-
-    delete[] disk_busy;
-    delete[] disk_queues;
-
     return (oldtime - 1);
 }
 
 } // namespace async_schedule_local
 
-
 void compute_prefetch_schedule(
     const int_type* first,
     const int_type* last,
diff --git a/lib/common/cmdline.cpp b/lib/common/cmdline.cpp
index c785926..7c1999d 100644
--- a/lib/common/cmdline.cpp
+++ b/lib/common/cmdline.cpp
@@ -102,7 +102,8 @@ void cmdline_parser::print_usage(std::ostream& os)
         {
             const argument* arg = *it;
 
-            os << "  " << std::setw(m_param_maxlong) << std::left << arg->param_text();
+            os << "  " << std::setw(m_param_maxlong) << std::left
+               << arg->param_text();
             output_wrap(os, arg->m_desc, m_linewrap,
                         0, m_param_maxlong + 2, m_param_maxlong + 2, 8);
         }
@@ -117,7 +118,8 @@ void cmdline_parser::print_usage(std::ostream& os)
         {
             const argument* arg = *it;
 
-            os << "  " << std::setw(m_opt_maxlong) << std::left << arg->option_text();
+            os << "  " << std::setw(m_opt_maxlong) << std::left
+               << arg->option_text();
             output_wrap(os, arg->m_desc, m_linewrap,
                         0, m_opt_maxlong + 2, m_opt_maxlong + 2, 8);
         }
@@ -126,8 +128,9 @@ void cmdline_parser::print_usage(std::ostream& os)
     os.copyfmt(state);
 }
 
-void cmdline_parser::print_option_error(int argc, const char* const* argv, const argument* arg,
-                                        std::ostream& os)
+void cmdline_parser::print_option_error(
+    int argc, const char* const* argv, const argument* arg,
+    std::ostream& os)
 {
     os << "Error: Argument ";
     if (argc != 0)
@@ -305,7 +308,7 @@ void cmdline_parser::print_result(std::ostream& os)
     std::ios state(NULL);
     state.copyfmt(os);
 
-    size_t maxlong = STXXL_MAX(m_param_maxlong, m_opt_maxlong);
+    int maxlong = STXXL_MAX(m_param_maxlong, m_opt_maxlong);
 
     if (m_paramlist.size())
     {
diff --git a/lib/common/exithandler.cpp b/lib/common/exithandler.cpp
index b257205..2592b03 100644
--- a/lib/common/exithandler.cpp
+++ b/lib/common/exithandler.cpp
@@ -17,7 +17,6 @@
 // 2. #define STXXL_NON_DEFAULT_EXIT_HANDLER for a handler that does not use atexit()
 // 3. #define STXXL_EXTERNAL_EXIT_HANDLER to provide your own implementation
 
-
 #ifndef STXXL_EXTERNAL_EXIT_HANDLER
 #ifndef STXXL_NON_DEFAULT_EXIT_HANDLER
 
diff --git a/lib/common/rand.cpp b/lib/common/rand.cpp
index efffe22..ad7bbc3 100644
--- a/lib/common/rand.cpp
+++ b/lib/common/rand.cpp
@@ -15,7 +15,6 @@
 #include <stxxl/bits/common/seed.h>
 #include <stxxl/bits/namespace.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 unsigned ran32State = get_next_seed();
diff --git a/lib/common/seed.cpp b/lib/common/seed.cpp
index d41379f..fa91567 100644
--- a/lib/common/seed.cpp
+++ b/lib/common/seed.cpp
@@ -27,7 +27,6 @@
   #include <sys/time.h>
 #endif
 
-
 STXXL_BEGIN_NAMESPACE
 
 inline unsigned initial_seed();
@@ -61,7 +60,7 @@ inline unsigned initial_seed()
     struct timeval tv;
     gettimeofday(&tv, 0);
 
-    return tv.tv_sec ^ tv.tv_usec ^ (getpid() << 16);
+    return (unsigned)(tv.tv_sec ^ tv.tv_usec ^ (getpid() << 16));
 #endif
 }
 
diff --git a/lib/common/verbose.cpp b/lib/common/verbose.cpp
index b43de8f..ccc1649 100644
--- a/lib/common/verbose.cpp
+++ b/lib/common/verbose.cpp
@@ -18,7 +18,6 @@
 #include <stxxl/bits/common/timer.h>
 #include <stxxl/bits/msvc_compatibility.h>
 
-
 #ifndef STXXL_THREAD_ID
 # if STXXL_STD_THREADS || STXXL_BOOST_THREADS
 #  define STXXL_THREAD_ID (-1)
@@ -27,7 +26,6 @@
 # endif
 #endif
 
-
 STXXL_BEGIN_NAMESPACE
 
 static const double program_start_time_stamp = timestamp();
diff --git a/lib/common/version.cpp b/lib/common/version.cpp
index 23f3369..15e4dec 100644
--- a/lib/common/version.cpp
+++ b/lib/common/version.cpp
@@ -15,7 +15,6 @@
 #include <stxxl/bits/namespace.h>
 #include <stxxl/bits/version.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
 int version_major()
diff --git a/lib/io/boostfd_file.cpp b/lib/io/boostfd_file.cpp
index bd68c53..d4dd184 100644
--- a/lib/io/boostfd_file.cpp
+++ b/lib/io/boostfd_file.cpp
@@ -6,6 +6,7 @@
  *  Copyright (C) 2006 Roman Dementiev <dementiev at ira.uka.de>
  *  Copyright (C) 2009, 2010 Johannes Singler <singler at kit.edu>
  *  Copyright (C) 2008, 2010 Andreas Beckmann <beckmann at cs.uni-frankfurt.de>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
  *
  *  Distributed under the Boost Software License, Version 1.0.
  *  (See accompanying file LICENSE_1_0.txt or copy at
@@ -23,22 +24,16 @@
 #include <boost/filesystem/fstream.hpp>
 #include <boost/version.hpp>
 
-
 STXXL_BEGIN_NAMESPACE
 
-
-void boostfd_file::serve(const request* req) throw (io_error)
+void boostfd_file::serve(void* buffer, offset_type offset, size_type bytes,
+                         request::request_type type)
 {
-    scoped_mutex_lock fd_lock(fd_mutex);
-    assert(req->get_file() == this);
-    offset_type offset = req->get_offset();
-    void* buffer = req->get_buffer();
-    size_type bytes = req->get_size();
-    request::request_type type = req->get_type();
+    scoped_mutex_lock fd_lock(m_fd_mutex);
 
     try
     {
-        file_des.seek(offset, BOOST_IOS::beg);
+        m_file_des.seek(offset, BOOST_IOS::beg);
     }
     catch (const std::exception& ex)
     {
@@ -59,7 +54,7 @@ void boostfd_file::serve(const request* req) throw (io_error)
     {
         try
         {
-            std::streamsize rc = file_des.read((char*)buffer, bytes);
+            std::streamsize rc = m_file_des.read((char*)buffer, bytes);
             if (rc != std::streamsize(bytes)) {
                 STXXL_THROW_ERRNO(io_error, " partial read: " << rc << " missing " << (bytes - rc) << " out of " << bytes << " bytes");
             }
@@ -81,7 +76,7 @@ void boostfd_file::serve(const request* req) throw (io_error)
     {
         try
         {
-            std::streamsize rc = file_des.write((char*)buffer, bytes);
+            std::streamsize rc = m_file_des.write((char*)buffer, bytes);
             if (rc != std::streamsize(bytes)) {
                 STXXL_THROW_ERRNO(io_error, " partial write: " << rc << " missing " << (bytes - rc) << " out of " << bytes << " bytes");
             }
@@ -109,7 +104,10 @@ const char* boostfd_file::io_type() const
 boostfd_file::boostfd_file(
     const std::string& filename,
     int mode,
-    int queue_id, int allocator_id) : disk_queued_file(queue_id, allocator_id), mode_(mode)
+    int queue_id, int allocator_id, unsigned int device_id)
+    : file(device_id),
+      disk_queued_file(queue_id, allocator_id),
+      m_mode(mode)
 {
     BOOST_IOS::openmode boostfd_mode =
         (mode & RDWR) ? (BOOST_IOS::out | BOOST_IOS::in) :
@@ -165,38 +163,36 @@ boostfd_file::boostfd_file(
     }
 
 #if (BOOST_VERSION >= 104100)
-    file_des.open(filename, boostfd_mode);      // also compiles with earlier Boost versions, but differs semantically
+    m_file_des.open(filename, boostfd_mode);      // also compiles with earlier Boost versions, but differs semantically
 #else
-    file_des.open(filename, boostfd_mode, boostfd_mode);
+    m_file_des.open(filename, boostfd_mode, boostfd_mode);
 #endif
 }
 
 boostfd_file::~boostfd_file()
 {
-    scoped_mutex_lock fd_lock(fd_mutex);
-    file_des.close();
+    scoped_mutex_lock fd_lock(m_fd_mutex);
+    m_file_des.close();
 }
 
 inline file::offset_type boostfd_file::_size()
 {
-    return file_des.seek(0, BOOST_IOS::end);
+    return m_file_des.seek(0, BOOST_IOS::end);
 }
 
 file::offset_type boostfd_file::size()
 {
-    scoped_mutex_lock fd_lock(fd_mutex);
+    scoped_mutex_lock fd_lock(m_fd_mutex);
     return _size();
 }
 
 void boostfd_file::set_size(offset_type newsize)
 {
-    scoped_mutex_lock fd_lock(fd_mutex);
-#ifndef NDEBUG
+    scoped_mutex_lock fd_lock(m_fd_mutex);
     offset_type oldsize = _size();
-#endif // NDEBUG
-    file_des.seek(newsize, BOOST_IOS::beg);
-    file_des.seek(0, BOOST_IOS::beg); // not important ?
-    assert(_size() >= oldsize);
+    m_file_des.seek(newsize, BOOST_IOS::beg);
+    m_file_des.seek(0, BOOST_IOS::beg); // not important ?
+    STXXL_ASSERT(_size() >= oldsize);
 }
 
 void boostfd_file::lock()
diff --git a/lib/io/create_file.cpp b/lib/io/create_file.cpp
index 20ecc36..7add1fb 100644
--- a/lib/io/create_file.cpp
+++ b/lib/io/create_file.cpp
@@ -22,7 +22,6 @@
 #include <ostream>
 #include <stdexcept>
 
-
 STXXL_BEGIN_NAMESPACE
 
 file * create_file(const std::string& io_impl,
@@ -57,25 +56,37 @@ file * create_file(disk_config& cfg, int mode, int disk_allocator_id)
         break;
     }
 
+    // automatically enumerate disks as separate device ids
+
+    if (cfg.device_id == file::DEFAULT_DEVICE_ID)
+    {
+        cfg.device_id = config::get_instance()->get_next_device_id();
+    }
+    else
+    {
+        config::get_instance()->update_max_device_id(cfg.device_id);
+    }
+
     // *** Select fileio Implementation
 
     if (cfg.io_impl == "syscall")
     {
         ufs_file_base* result =
-            new syscall_file(cfg.path, mode, cfg.queue, disk_allocator_id);
+            new syscall_file(cfg.path, mode, cfg.queue, disk_allocator_id,
+                             cfg.device_id);
         result->lock();
 
         // if marked as device but file is not -> throw!
         if (cfg.raw_device && !result->is_device())
         {
             delete result;
-            STXXL_THROW(io_error, "Disk " << cfg.path << " was expected to be raw block device, but it is a normal file!");
+            STXXL_THROW(io_error, "Disk " << cfg.path << " was expected to be "
+                        "a raw block device, but it is a normal file!");
         }
 
         // if is raw_device -> get size and remove some flags.
         if (result->is_device())
         {
-            // if device
             cfg.raw_device = true;
             cfg.size = result->size();
             cfg.autogrow = cfg.delete_on_exit = cfg.unlink_on_open = false;
@@ -89,21 +100,58 @@ file * create_file(disk_config& cfg, int mode, int disk_allocator_id)
     else if (cfg.io_impl == "fileperblock_syscall")
     {
         fileperblock_file<syscall_file>* result =
-            new fileperblock_file<syscall_file>(cfg.path, mode, cfg.queue, disk_allocator_id);
+            new fileperblock_file<syscall_file>(cfg.path, mode, cfg.queue,
+                                                disk_allocator_id, cfg.device_id);
         result->lock();
         return result;
     }
     else if (cfg.io_impl == "memory")
     {
-        mem_file* result = new mem_file(cfg.queue, disk_allocator_id);
+        mem_file* result = new mem_file(cfg.queue, disk_allocator_id, cfg.device_id);
         result->lock();
         return result;
     }
+#if STXXL_HAVE_LINUXAIO_FILE
+    // linuxaio can have the desired queue length, specified as queue_length=?
+    else if (cfg.io_impl == "linuxaio")
+    {
+        // linuxaio_queue is a singleton.
+        cfg.queue = file::DEFAULT_LINUXAIO_QUEUE;
+
+        ufs_file_base* result =
+            new linuxaio_file(cfg.path, mode, cfg.queue, disk_allocator_id,
+                              cfg.device_id, cfg.queue_length);
+
+        result->lock();
+
+        // if marked as device but file is not -> throw!
+        if (cfg.raw_device && !result->is_device())
+        {
+            delete result;
+            STXXL_THROW(io_error, "Disk " << cfg.path << " was expected to be "
+                        "a raw block device, but it is a normal file!");
+        }
+
+        // if is raw_device -> get size and remove some flags.
+        if (result->is_device())
+        {
+            cfg.raw_device = true;
+            cfg.size = result->size();
+            cfg.autogrow = cfg.delete_on_exit = cfg.unlink_on_open = false;
+        }
+
+        if (cfg.unlink_on_open)
+            result->unlink();
+
+        return result;
+    }
+#endif
 #if STXXL_HAVE_MMAP_FILE
     else if (cfg.io_impl == "mmap")
     {
         ufs_file_base* result =
-            new mmap_file(cfg.path, mode, cfg.queue, disk_allocator_id);
+            new mmap_file(cfg.path, mode, cfg.queue, disk_allocator_id,
+                          cfg.device_id);
         result->lock();
 
         if (cfg.unlink_on_open)
@@ -114,7 +162,8 @@ file * create_file(disk_config& cfg, int mode, int disk_allocator_id)
     else if (cfg.io_impl == "fileperblock_mmap")
     {
         fileperblock_file<mmap_file>* result =
-            new fileperblock_file<mmap_file>(cfg.path, mode, cfg.queue, disk_allocator_id);
+            new fileperblock_file<mmap_file>(cfg.path, mode, cfg.queue,
+                                             disk_allocator_id, cfg.device_id);
         result->lock();
         return result;
     }
@@ -124,7 +173,8 @@ file * create_file(disk_config& cfg, int mode, int disk_allocator_id)
     {
         mode &= ~(file::DIRECT | file::REQUIRE_DIRECT);  // clear the DIRECT flag, this file is supposed to be on tmpfs
         ufs_file_base* result =
-            new sim_disk_file(cfg.path, mode, cfg.queue, disk_allocator_id);
+            new sim_disk_file(cfg.path, mode, cfg.queue, disk_allocator_id,
+                              cfg.device_id);
         result->lock();
         return result;
     }
@@ -133,14 +183,16 @@ file * create_file(disk_config& cfg, int mode, int disk_allocator_id)
     else if (cfg.io_impl == "wincall")
     {
         wfs_file_base* result =
-            new wincall_file(cfg.path, mode, cfg.queue, disk_allocator_id);
+            new wincall_file(cfg.path, mode, cfg.queue, disk_allocator_id,
+                             cfg.device_id);
         result->lock();
         return result;
     }
     else if (cfg.io_impl == "fileperblock_wincall")
     {
         fileperblock_file<wincall_file>* result =
-            new fileperblock_file<wincall_file>(cfg.path, mode, cfg.queue, disk_allocator_id);
+            new fileperblock_file<wincall_file>(cfg.path, mode, cfg.queue,
+                                                disk_allocator_id, cfg.device_id);
         result->lock();
         return result;
     }
@@ -149,14 +201,16 @@ file * create_file(disk_config& cfg, int mode, int disk_allocator_id)
     else if (cfg.io_impl == "boostfd")
     {
         boostfd_file* result =
-            new boostfd_file(cfg.path, mode, cfg.queue, disk_allocator_id);
+            new boostfd_file(cfg.path, mode, cfg.queue, disk_allocator_id,
+                             cfg.device_id);
         result->lock();
         return result;
     }
     else if (cfg.io_impl == "fileperblock_boostfd")
     {
         fileperblock_file<boostfd_file>* result =
-            new fileperblock_file<boostfd_file>(cfg.path, mode, cfg.queue, disk_allocator_id);
+            new fileperblock_file<boostfd_file>(cfg.path, mode, cfg.queue,
+                                                disk_allocator_id, cfg.device_id);
         result->lock();
         return result;
     }
@@ -167,7 +221,8 @@ file * create_file(disk_config& cfg, int mode, int disk_allocator_id)
         ufs_file_base* backend =
             new syscall_file(cfg.path, mode, -1, -1); // FIXME: ID
         wbtl_file* result =
-            new stxxl::wbtl_file(backend, 16 * 1024 * 1024, 2, cfg.queue, disk_allocator_id);
+            new stxxl::wbtl_file(backend, 16 * 1024 * 1024, 2, cfg.queue,
+                                 disk_allocator_id);
         result->lock();
 
         if (cfg.unlink_on_open)
diff --git a/lib/io/disk_queued_file.cpp b/lib/io/disk_queued_file.cpp
index 83ad416..3c5019c 100644
--- a/lib/io/disk_queued_file.cpp
+++ b/lib/io/disk_queued_file.cpp
@@ -19,10 +19,8 @@
 #include <stxxl/bits/namespace.h>
 #include <stxxl/bits/singleton.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-
 request_ptr disk_queued_file::aread(
     void* buffer,
     offset_type pos,
diff --git a/lib/io/fileperblock_file.cpp b/lib/io/fileperblock_file.cpp
index b3fd352..55f7a5f 100644
--- a/lib/io/fileperblock_file.cpp
+++ b/lib/io/fileperblock_file.cpp
@@ -28,6 +28,8 @@
 #include <stxxl/bits/io/file.h>
 #include <stxxl/bits/io/fileperblock_file.h>
 #include <stxxl/bits/io/mmap_file.h>
+#include <stxxl/bits/io/boostfd_file.h>
+#include <stxxl/bits/io/wincall_file.h>
 #include <stxxl/bits/io/request.h>
 #include <stxxl/bits/io/serving_request.h>
 #include <stxxl/bits/io/syscall_file.h>
@@ -37,7 +39,6 @@
 #include <stxxl/bits/verbose.h>
 #include "ufs_platform.h"
 
-
 STXXL_BEGIN_NAMESPACE
 
 template <class base_file_type>
@@ -45,8 +46,10 @@ fileperblock_file<base_file_type>::fileperblock_file(
     const std::string& filename_prefix,
     int mode,
     int queue_id,
-    int allocator_id)
-    : disk_queued_file(queue_id, allocator_id),
+    int allocator_id,
+    unsigned int device_id)
+    : file(device_id),
+      disk_queued_file(queue_id, allocator_id),
       filename_prefix(filename_prefix),
       mode(mode),
       current_size(0),
@@ -74,16 +77,12 @@ std::string fileperblock_file<base_file_type>::filename_for_block(offset_type of
 }
 
 template <class base_file_type>
-void fileperblock_file<base_file_type>::serve(const request* req) throw (io_error)
+void fileperblock_file<base_file_type>::serve(void* buffer, offset_type offset,
+                                              size_type bytes, request::request_type type)
 {
-    assert(req->get_file() == this);
-
-    base_file_type base_file(filename_for_block(req->get_offset()), mode, get_queue_id());
-    base_file.set_size(req->get_size());
-
-    request_ptr derived(new serving_request(default_completion_handler(), &base_file, req->get_buffer(), 0, req->get_size(), req->get_type()));
-    request_ptr dummy = derived;
-    derived->serve();
+    base_file_type base_file(filename_for_block(offset), mode, get_queue_id());
+    base_file.set_size(bytes);
+    base_file.serve(buffer, 0, bytes, type);
 }
 
 template <class base_file_type>
@@ -92,15 +91,15 @@ void fileperblock_file<base_file_type>::lock()
     if (!lock_file_created)
     {
         //create lock file and fill it with one page, an empty file cannot be locked
-        const int page_size = BLOCK_ALIGN;
-        void* one_page = aligned_alloc<BLOCK_ALIGN>(page_size);
+        const int page_size = STXXL_BLOCK_ALIGN;
+        void* one_page = aligned_alloc<STXXL_BLOCK_ALIGN>(page_size);
 #if STXXL_WITH_VALGRIND
         memset(one_page, 0, page_size);
 #endif
         lock_file.set_size(page_size);
-        request_ptr r = lock_file.awrite(one_page, 0, page_size, default_completion_handler());
+        request_ptr r = lock_file.awrite(one_page, 0, page_size);
         r->wait();
-        aligned_dealloc<BLOCK_ALIGN>(one_page);
+        aligned_dealloc<STXXL_BLOCK_ALIGN>(one_page);
         lock_file_created = true;
     }
     lock_file.lock();
diff --git a/lib/io/iostats.cpp b/lib/io/iostats.cpp
index c7982dc..e9ad23e 100644
--- a/lib/io/iostats.cpp
+++ b/lib/io/iostats.cpp
@@ -26,37 +26,37 @@
 
 STXXL_BEGIN_NAMESPACE
 
-stats::stats() :
-    reads(0),
-    writes(0),
-    volume_read(0),
-    volume_written(0),
-    c_reads(0),
-    c_writes(0),
-    c_volume_read(0),
-    c_volume_written(0),
-    t_reads(0.0),
-    t_writes(0.0),
-    p_reads(0.0),
-    p_writes(0.0),
-    p_begin_read(0.0),
-    p_begin_write(0.0),
-    p_ios(0.0),
-    p_begin_io(0.0),
-    t_waits(0.0),
-    p_waits(0.0),
-    p_begin_wait(0.0),
-    t_wait_read(0.0),
-    p_wait_read(0.0),
-    p_begin_wait_read(0.0),
-    t_wait_write(0.0),
-    p_wait_write(0.0),
-    p_begin_wait_write(0.0),
-    acc_reads(0), acc_writes(0),
-    acc_ios(0),
-    acc_waits(0),
-    acc_wait_read(0), acc_wait_write(0),
-    last_reset(timestamp())
+stats::stats()
+    : reads(0),
+      writes(0),
+      volume_read(0),
+      volume_written(0),
+      c_reads(0),
+      c_writes(0),
+      c_volume_read(0),
+      c_volume_written(0),
+      t_reads(0.0),
+      t_writes(0.0),
+      p_reads(0.0),
+      p_writes(0.0),
+      p_begin_read(0.0),
+      p_begin_write(0.0),
+      p_ios(0.0),
+      p_begin_io(0.0),
+      t_waits(0.0),
+      p_waits(0.0),
+      p_begin_wait(0.0),
+      t_wait_read(0.0),
+      p_wait_read(0.0),
+      p_begin_wait_read(0.0),
+      t_wait_write(0.0),
+      p_wait_write(0.0),
+      p_begin_wait_write(0.0),
+      acc_reads(0), acc_writes(0),
+      acc_ios(0),
+      acc_waits(0),
+      acc_wait_read(0), acc_wait_write(0),
+      last_reset(timestamp())
 { }
 
 #ifndef STXXL_IO_STATS_RESET_FORBIDDEN
@@ -360,10 +360,10 @@ std::ostream& operator << (std::ostream& o, const stats_data& s)
       << hr(s.get_reads() ? s.get_read_volume() / s.get_reads() : 0, "B") << std::endl;
     o << " number of bytes read from disks            : " << hr(s.get_read_volume(), "B") << std::endl;
     o << " time spent in serving all read requests    : " << s.get_read_time() << " s"
-      << " @ " << (s.get_read_volume() / 1048576.0 / s.get_read_time()) << " MiB/s"
+      << " @ " << ((double)s.get_read_volume() / 1048576.0 / s.get_read_time()) << " MiB/s"
       << std::endl;
     o << " time spent in reading (parallel read time) : " << s.get_pread_time() << " s"
-      << " @ " << (s.get_read_volume() / 1048576.0 / s.get_pread_time()) << " MiB/s"
+      << " @ " << ((double)s.get_read_volume() / 1048576.0 / s.get_pread_time()) << " MiB/s"
       << std::endl;
     if (s.get_cached_reads()) {
         o << " total number of cached reads               : " << hr(s.get_cached_reads()) << std::endl;
@@ -380,13 +380,13 @@ std::ostream& operator << (std::ostream& o, const stats_data& s)
       << hr(s.get_writes() ? s.get_written_volume() / s.get_writes() : 0, "B") << std::endl;
     o << " number of bytes written to disks           : " << hr(s.get_written_volume(), "B") << std::endl;
     o << " time spent in serving all write requests   : " << s.get_write_time() << " s"
-      << " @ " << (s.get_written_volume() / 1048576.0 / s.get_write_time()) << " MiB/s"
+      << " @ " << ((double)s.get_written_volume() / 1048576.0 / s.get_write_time()) << " MiB/s"
       << std::endl;
     o << " time spent in writing (parallel write time): " << s.get_pwrite_time() << " s"
-      << " @ " << (s.get_written_volume() / 1048576.0 / s.get_pwrite_time()) << " MiB/s"
+      << " @ " << ((double)s.get_written_volume() / 1048576.0 / s.get_pwrite_time()) << " MiB/s"
       << std::endl;
     o << " time spent in I/O (parallel I/O time)      : " << s.get_pio_time() << " s"
-      << " @ " << ((s.get_read_volume() + s.get_written_volume()) / 1048576.0 / s.get_pio_time()) << " MiB/s"
+      << " @ " << ((double)(s.get_read_volume() + s.get_written_volume()) / 1048576.0 / s.get_pio_time()) << " MiB/s"
       << std::endl;
 #else
     o << " n/a" << std::endl;
diff --git a/lib/io/linuxaio_file.cpp b/lib/io/linuxaio_file.cpp
new file mode 100644
index 0000000..303df59
--- /dev/null
+++ b/lib/io/linuxaio_file.cpp
@@ -0,0 +1,66 @@
+/***************************************************************************
+ *  lib/io/linuxaio_file.cpp
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2011 Johannes Singler <singler at kit.edu>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#include <stxxl/bits/io/linuxaio_file.h>
+
+#if STXXL_HAVE_LINUXAIO_FILE
+
+#include <stxxl/bits/io/linuxaio_request.h>
+#include <stxxl/bits/io/disk_queues.h>
+
+STXXL_BEGIN_NAMESPACE
+
+request_ptr linuxaio_file::aread(
+    void* buffer,
+    offset_type pos,
+    size_type bytes,
+    const completion_handler& on_cmpl)
+{
+    request_ptr req(new linuxaio_request(on_cmpl, this, buffer, pos, bytes, request::READ));
+
+    disk_queues::get_instance()->add_request(req, get_queue_id());
+
+    return req;
+}
+
+request_ptr linuxaio_file::awrite(
+    void* buffer,
+    offset_type pos,
+    size_type bytes,
+    const completion_handler& on_cmpl)
+{
+    request_ptr req(new linuxaio_request(on_cmpl, this, buffer, pos, bytes, request::WRITE));
+
+    disk_queues::get_instance()->add_request(req, get_queue_id());
+
+    return req;
+}
+
+void linuxaio_file::serve(void* buffer, offset_type offset, size_type bytes,
+                          request::request_type type)
+{
+    // req need not be an linuxaio_request
+    if (type == request::READ)
+        aread(buffer, offset, bytes)->wait();
+    else
+        awrite(buffer, offset, bytes)->wait();
+}
+
+const char* linuxaio_file::io_type() const
+{
+    return "linuxaio";
+}
+
+STXXL_END_NAMESPACE
+
+#endif // #if STXXL_HAVE_LINUXAIO_FILE
+// vim: et:ts=4:sw=4
diff --git a/lib/io/linuxaio_queue.cpp b/lib/io/linuxaio_queue.cpp
new file mode 100644
index 0000000..e3610c3
--- /dev/null
+++ b/lib/io/linuxaio_queue.cpp
@@ -0,0 +1,284 @@
+/***************************************************************************
+ *  lib/io/linuxaio_queue.cpp
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2011 Johannes Singler <singler at kit.edu>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#include <stxxl/bits/io/linuxaio_queue.h>
+
+#if STXXL_HAVE_LINUXAIO_FILE
+
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include <stxxl/bits/verbose.h>
+#include <stxxl/bits/mng/block_manager.h>
+#include <stxxl/bits/common/error_handling.h>
+#include <stxxl/bits/io/linuxaio_request.h>
+#include <stxxl/bits/io/linuxaio_queue.h>
+
+#include <algorithm>
+
+#ifndef STXXL_CHECK_FOR_PENDING_REQUESTS_ON_SUBMISSION
+#define STXXL_CHECK_FOR_PENDING_REQUESTS_ON_SUBMISSION 1
+#endif
+
+STXXL_BEGIN_NAMESPACE
+
+linuxaio_queue::linuxaio_queue(int desired_queue_length)
+    : num_waiting_requests(0), num_free_events(0), num_posted_requests(0),
+      post_thread_state(NOT_RUNNING), wait_thread_state(NOT_RUNNING)
+{
+    if (desired_queue_length == 0) {
+        // default value, 64 entries per queue (i.e. usually per disk) should
+        // be enough
+        max_events = 64;
+    }
+    else
+        max_events = desired_queue_length;
+
+    // negotiate maximum number of simultaneous events with the OS
+    context = 0;
+    long result;
+    while ((result = syscall(SYS_io_setup, max_events, &context)) == -1 &&
+           errno == EAGAIN && max_events > 1)
+    {
+        max_events <<= 1;               // try with half as many events
+    }
+    if (result != 0) {
+        STXXL_THROW_ERRNO(io_error, "linuxaio_queue::linuxaio_queue"
+                          " io_setup() nr_events=" << max_events);
+    }
+
+    for (int e = 0; e < max_events; ++e)
+        num_free_events++;  // cannot set semaphore to value directly
+
+    STXXL_MSG("Set up an linuxaio queue with " << max_events << " entries.");
+
+    start_thread(post_async, static_cast<void*>(this), post_thread, post_thread_state);
+    start_thread(wait_async, static_cast<void*>(this), wait_thread, wait_thread_state);
+}
+
+linuxaio_queue::~linuxaio_queue()
+{
+    stop_thread(post_thread, post_thread_state, num_waiting_requests);
+    stop_thread(wait_thread, wait_thread_state, num_posted_requests);
+    syscall(SYS_io_destroy, context);
+}
+
+void linuxaio_queue::add_request(request_ptr& req)
+{
+    if (req.empty())
+        STXXL_THROW_INVALID_ARGUMENT("Empty request submitted to disk_queue.");
+    if (post_thread_state() != RUNNING)
+        STXXL_ERRMSG("Request submitted to stopped queue.");
+    if (!dynamic_cast<linuxaio_request*>(req.get()))
+        STXXL_ERRMSG("Non-LinuxAIO request submitted to LinuxAIO queue.");
+
+    scoped_mutex_lock lock(waiting_mtx);
+
+    waiting_requests.push_back(req);
+    num_waiting_requests++;
+}
+
+bool linuxaio_queue::cancel_request(request_ptr& req)
+{
+    if (req.empty())
+        STXXL_THROW_INVALID_ARGUMENT("Empty request canceled disk_queue.");
+    if (post_thread_state() != RUNNING)
+        STXXL_ERRMSG("Request canceled in stopped queue.");
+    if (!dynamic_cast<linuxaio_request*>(req.get()))
+        STXXL_ERRMSG("Non-LinuxAIO request submitted to LinuxAIO queue.");
+
+    queue_type::iterator pos;
+    {
+        scoped_mutex_lock lock(waiting_mtx);
+
+        pos = std::find(waiting_requests.begin(), waiting_requests.end(),
+                        req _STXXL_FORCE_SEQUENTIAL);
+        if (pos != waiting_requests.end())
+        {
+            waiting_requests.erase(pos);
+
+            // polymorphic_downcast to linuxaio_request,
+            // request is canceled, but was not yet posted.
+            dynamic_cast<linuxaio_request*>(pos->get())->completed(false, true);
+
+            num_waiting_requests--; // will never block
+            return true;
+        }
+    }
+
+    scoped_mutex_lock lock(posted_mtx);
+
+    pos = std::find(posted_requests.begin(), posted_requests.end(),
+                    req _STXXL_FORCE_SEQUENTIAL);
+    if (pos != posted_requests.end())
+    {
+        // polymorphic_downcast to linuxaio_request,
+        bool canceled_io_operation = (dynamic_cast<linuxaio_request*>(req.get()))->cancel_aio();
+
+        if (canceled_io_operation)
+        {
+            posted_requests.erase(pos);
+
+            // polymorphic_downcast to linuxaio_request,
+
+            // request is canceled, already posted
+            dynamic_cast<linuxaio_request*>(pos->get())->completed(true, true);
+
+            num_free_events++;
+            num_posted_requests--; // will never block
+            return true;
+        }
+    }
+
+    return false;
+}
+
+// internal routines, run by the posting thread
+void linuxaio_queue::post_requests()
+{
+    request_ptr req;
+    io_event* events = new io_event[max_events];
+
+    for ( ; ; ) // as long as thread is running
+    {
+        // might block until next request or message comes in
+        int num_currently_waiting_requests = num_waiting_requests--;
+
+        // terminate if termination has been requested
+        if (post_thread_state() == TERMINATING && num_currently_waiting_requests == 0)
+            break;
+
+        scoped_mutex_lock lock(waiting_mtx);
+        if (!waiting_requests.empty())
+        {
+            req = waiting_requests.front();
+            waiting_requests.pop_front();
+            lock.unlock();
+
+            num_free_events--; // might block because too many requests are posted
+
+            // polymorphic_downcast
+            while (!dynamic_cast<linuxaio_request*>(req.get())->post())
+            {
+                // post failed, so first handle events to make queues (more)
+                // empty, then try again.
+
+                // wait for at least one event to complete, no time limit
+                long num_events = syscall(SYS_io_getevents, context, 1, max_events, events, NULL);
+                if (num_events < 0) {
+                    STXXL_THROW_ERRNO(io_error, "linuxaio_queue::post_requests"
+                                      " io_getevents() nr_events=" << num_events);
+                }
+
+                handle_events(events, num_events, false);
+            }
+
+            // request is finally posted
+
+            {
+                scoped_mutex_lock lock(posted_mtx);
+                posted_requests.push_back(req);
+                num_posted_requests++;
+            }
+        }
+        else
+        {
+            lock.unlock();
+
+            // num_waiting_requests-- was premature, compensate for that
+            num_waiting_requests++;
+        }
+    }
+
+    delete[] events;
+}
+
+void linuxaio_queue::handle_events(io_event* events, long num_events, bool canceled)
+{
+    for (int e = 0; e < num_events; ++e)
+    {
+        // unsigned_type is as long as a pointer, and like this, we avoid an icpc warning
+        request_ptr* r = reinterpret_cast<request_ptr*>(static_cast<unsigned_type>(events[e].data));
+        r->get()->completed(canceled);
+        delete r;              // release auto_ptr reference
+        num_free_events++;
+        num_posted_requests--; // will never block
+    }
+}
+
+// internal routines, run by the waiting thread
+void linuxaio_queue::wait_requests()
+{
+    request_ptr req;
+    io_event* events = new io_event[max_events];
+
+    for ( ; ; ) // as long as thread is running
+    {
+        // might block until next request is posted or message comes in
+        int num_currently_posted_requests = num_posted_requests--;
+
+        // terminate if termination has been requested
+        if (wait_thread_state() == TERMINATING && num_currently_posted_requests == 0)
+            break;
+
+        // wait for at least one of them to finish
+        long num_events = syscall(SYS_io_getevents, context, 1, max_events, events, NULL);
+        if (num_events < 0) {
+            STXXL_THROW_ERRNO(io_error, "linuxaio_queue::wait_requests"
+                              " io_getevents() nr_events=" << max_events);
+        }
+
+        num_posted_requests++; // compensate for the one eaten prematurely above
+
+        handle_events(events, num_events, false);
+    }
+
+    delete[] events;
+}
+
+void* linuxaio_queue::post_async(void* arg)
+{
+    (static_cast<linuxaio_queue*>(arg))->post_requests();
+
+    self_type* pthis = static_cast<self_type*>(arg);
+    pthis->post_thread_state.set_to(TERMINATED);
+
+#if STXXL_STD_THREADS && STXXL_MSVC >= 1700
+    // Workaround for deadlock bug in Visual C++ Runtime 2012 and 2013, see
+    // request_queue_impl_worker.cpp. -tb
+    ExitThread(NULL);
+#else
+    return NULL;
+#endif
+}
+
+void* linuxaio_queue::wait_async(void* arg)
+{
+    (static_cast<linuxaio_queue*>(arg))->wait_requests();
+
+    self_type* pthis = static_cast<self_type*>(arg);
+    pthis->wait_thread_state.set_to(TERMINATED);
+
+#if STXXL_STD_THREADS && STXXL_MSVC >= 1700
+    // Workaround for deadlock bug in Visual C++ Runtime 2012 and 2013, see
+    // request_queue_impl_worker.cpp. -tb
+    ExitThread(NULL);
+#else
+    return NULL;
+#endif
+}
+
+STXXL_END_NAMESPACE
+
+#endif // #if STXXL_HAVE_LINUXAIO_FILE
+// vim: et:ts=4:sw=4
diff --git a/lib/io/linuxaio_request.cpp b/lib/io/linuxaio_request.cpp
new file mode 100644
index 0000000..464acc6
--- /dev/null
+++ b/lib/io/linuxaio_request.cpp
@@ -0,0 +1,129 @@
+/***************************************************************************
+ *  lib/io/linuxaio_request.cpp
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2011 Johannes Singler <singler at kit.edu>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#include <stxxl/bits/io/linuxaio_request.h>
+
+#if STXXL_HAVE_LINUXAIO_FILE
+
+#include <stxxl/bits/io/disk_queues.h>
+#include <stxxl/bits/verbose.h>
+#include <stxxl/bits/common/error_handling.h>
+
+#include <unistd.h>
+#include <sys/syscall.h>
+
+STXXL_BEGIN_NAMESPACE
+
+void linuxaio_request::completed(bool posted, bool canceled)
+{
+    STXXL_VERBOSE_LINUXAIO("linuxaio_request[" << this << "] completed(" <<
+                           posted << "," << canceled << ")");
+
+    if (!canceled)
+    {
+        if (m_type == READ)
+            stats::get_instance()->read_finished();
+        else
+            stats::get_instance()->write_finished();
+    }
+    else if (posted)
+    {
+        if (m_type == READ)
+            stats::get_instance()->read_canceled(m_bytes);
+        else
+            stats::get_instance()->write_canceled(m_bytes);
+    }
+    request_with_state::completed(canceled);
+}
+
+void linuxaio_request::fill_control_block()
+{
+    linuxaio_file* af = dynamic_cast<linuxaio_file*>(m_file);
+
+    memset(&cb, 0, sizeof(cb));
+    // indirection, so the I/O system retains a counting_ptr reference
+    cb.aio_data = reinterpret_cast<__u64>(new request_ptr(this));
+    cb.aio_fildes = af->file_des;
+    cb.aio_lio_opcode = (m_type == READ) ? IOCB_CMD_PREAD : IOCB_CMD_PWRITE;
+    cb.aio_reqprio = 0;
+    cb.aio_buf = static_cast<__u64>((unsigned long)(m_buffer));
+    cb.aio_nbytes = m_bytes;
+    cb.aio_offset = m_offset;
+}
+
+//! Submits an I/O request to the OS
+//! \returns false if submission fails
+bool linuxaio_request::post()
+{
+    STXXL_VERBOSE_LINUXAIO("linuxaio_request[" << this << "] post()");
+
+    fill_control_block();
+    iocb* cb_pointer = &cb;
+    // io_submit might considerable time, so we have to remember the current
+    // time before the call.
+    double now = timestamp();
+    linuxaio_queue* queue = dynamic_cast<linuxaio_queue*>(
+        disk_queues::get_instance()->get_queue(m_file->get_queue_id())
+        );
+    long success = syscall(SYS_io_submit, queue->get_io_context(), 1, &cb_pointer);
+    if (success == 1)
+    {
+        if (m_type == READ)
+            stats::get_instance()->read_started(m_bytes, now);
+        else
+            stats::get_instance()->write_started(m_bytes, now);
+    }
+    else if (success == -1 && errno != EAGAIN)
+        STXXL_THROW_ERRNO(io_error, "linuxaio_request::post"
+                          " io_submit()");
+
+    return success == 1;
+}
+
+//! Cancel the request
+//!
+//! Routine is called by user, as part of the request interface.
+bool linuxaio_request::cancel()
+{
+    STXXL_VERBOSE_LINUXAIO("linuxaio_request[" << this << "] cancel()");
+
+    if (!m_file) return false;
+
+    request_ptr req(this);
+    linuxaio_queue* queue = dynamic_cast<linuxaio_queue*>(
+        disk_queues::get_instance()->get_queue(m_file->get_queue_id())
+        );
+    return queue->cancel_request(req);
+}
+
+//! Cancel already posted request
+bool linuxaio_request::cancel_aio()
+{
+    STXXL_VERBOSE_LINUXAIO("linuxaio_request[" << this << "] cancel_aio()");
+
+    if (!m_file) return false;
+
+    io_event event;
+    linuxaio_queue* queue = dynamic_cast<linuxaio_queue*>(
+        disk_queues::get_instance()->get_queue(m_file->get_queue_id())
+        );
+    long result = syscall(SYS_io_cancel, queue->get_io_context(), &cb, &event);
+    if (result == 0)    //successfully canceled
+        queue->handle_events(&event, 1, true);
+    return result == 0;
+}
+
+STXXL_END_NAMESPACE
+
+#endif // #if STXXL_HAVE_LINUXAIO_FILE
+// vim: et:ts=4:sw=4
diff --git a/lib/io/mem_file.cpp b/lib/io/mem_file.cpp
index 810239e..065ee50 100644
--- a/lib/io/mem_file.cpp
+++ b/lib/io/mem_file.cpp
@@ -4,7 +4,7 @@
  *  Part of the STXXL. See http://stxxl.sourceforge.net
  *
  *  Copyright (C) 2008 Andreas Beckmann <beckmann at cs.uni-frankfurt.de>
- *  Copyright (C) 2013 Timo Bingmann <tb at panthema.net>
+ *  Copyright (C) 2013-2014 Timo Bingmann <tb at panthema.net>
  *
  *  Distributed under the Boost Software License, Version 1.0.
  *  (See accompanying file LICENSE_1_0.txt or copy at
@@ -18,29 +18,22 @@
 #include <stxxl/bits/io/mem_file.h>
 #include <stxxl/bits/io/iostats.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-
-void mem_file::serve(const request* req) throw (io_error)
+void mem_file::serve(void* buffer, offset_type offset, size_type bytes,
+                     request::request_type type)
 {
     scoped_mutex_lock lock(m_mutex);
 
-    assert(req->get_file() == this);
-    offset_type offset = req->get_offset();
-    void* buffer = req->get_buffer();
-    size_type bytes = req->get_size();
-    request::request_type type = req->get_type();
-
     if (type == request::READ)
     {
         stats::scoped_read_timer read_timer(bytes);
-        memcpy(buffer, ptr + offset, bytes);
+        memcpy(buffer, m_ptr + offset, bytes);
     }
     else
     {
         stats::scoped_write_timer write_timer(bytes);
-        memcpy(ptr + offset, buffer, bytes);
+        memcpy(m_ptr + offset, buffer, bytes);
     }
 }
 
@@ -51,8 +44,8 @@ const char* mem_file::io_type() const
 
 mem_file::~mem_file()
 {
-    free(ptr);
-    ptr = NULL;
+    free(m_ptr);
+    m_ptr = NULL;
 }
 
 void mem_file::lock()
@@ -62,7 +55,7 @@ void mem_file::lock()
 
 file::offset_type mem_file::size()
 {
-    return sz;
+    return m_size;
 }
 
 void mem_file::set_size(offset_type newsize)
@@ -70,8 +63,8 @@ void mem_file::set_size(offset_type newsize)
     scoped_mutex_lock lock(m_mutex);
     assert(newsize <= std::numeric_limits<offset_type>::max());
 
-    ptr = (char*)realloc(ptr, (size_t)newsize);
-    sz = newsize;
+    m_ptr = (char*)realloc(m_ptr, (size_t)newsize);
+    m_size = newsize;
 }
 
 void mem_file::discard(offset_type offset, offset_type size)
@@ -80,15 +73,15 @@ void mem_file::discard(offset_type offset, offset_type size)
 #ifndef STXXL_MEMFILE_DONT_CLEAR_FREED_MEMORY
     // overwrite the freed region with uninitialized memory
     STXXL_VERBOSE("discard at " << offset << " len " << size);
-    void* uninitialized = malloc(BLOCK_ALIGN);
-    while (size >= BLOCK_ALIGN) {
-        memcpy(ptr + offset, uninitialized, BLOCK_ALIGN);
-        offset += BLOCK_ALIGN;
-        size -= BLOCK_ALIGN;
+    void* uninitialized = malloc(STXXL_BLOCK_ALIGN);
+    while (size >= STXXL_BLOCK_ALIGN) {
+        memcpy(m_ptr + offset, uninitialized, STXXL_BLOCK_ALIGN);
+        offset += STXXL_BLOCK_ALIGN;
+        size -= STXXL_BLOCK_ALIGN;
     }
     assert(size <= std::numeric_limits<offset_type>::max());
     if (size > 0)
-        memcpy(ptr + offset, uninitialized, (size_t)size);
+        memcpy(m_ptr + offset, uninitialized, (size_t)size);
     free(uninitialized);
 #else
     STXXL_UNUSED(offset);
diff --git a/lib/io/mmap_file.cpp b/lib/io/mmap_file.cpp
index 8e0b3e2..20b3f53 100644
--- a/lib/io/mmap_file.cpp
+++ b/lib/io/mmap_file.cpp
@@ -20,18 +20,12 @@
 #include "ufs_platform.h"
 #include <sys/mman.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-
-void mmap_file::serve(const request* req) throw (io_error)
+void mmap_file::serve(void* buffer, offset_type offset, size_type bytes,
+                      request::request_type type)
 {
     scoped_mutex_lock fd_lock(fd_mutex);
-    assert(req->get_file() == this);
-    offset_type offset = req->get_offset();
-    void* buffer = req->get_buffer();
-    size_type bytes = req->get_size();
-    request::request_type type = req->get_type();
 
     //assert(offset + bytes <= _size());
 
diff --git a/lib/io/request.cpp b/lib/io/request.cpp
index 58fc03b..6fd4818 100644
--- a/lib/io/request.cpp
+++ b/lib/io/request.cpp
@@ -16,77 +16,74 @@
 #include <stxxl/bits/io/request.h>
 #include <stxxl/bits/io/file.h>
 
-
 STXXL_BEGIN_NAMESPACE
 
-request::request(const completion_handler& on_compl,
-                 file* file__,
-                 void* buffer_,
-                 offset_type offset_,
-                 size_type bytes_,
-                 request_type type_) :
-    on_complete(on_compl),
-    file_(file__),
-    buffer(buffer_),
-    offset(offset_),
-    bytes(bytes_),
-    type(type_)
+request::request(
+    const completion_handler& on_compl,
+    file* file,
+    void* buffer,
+    offset_type offset,
+    size_type bytes,
+    request_type type)
+    : m_on_complete(on_compl),
+      m_file(file),
+      m_buffer(buffer),
+      m_offset(offset),
+      m_bytes(bytes),
+      m_type(type)
 {
-    STXXL_VERBOSE3("[" << static_cast<void*>(this) << "] request::(...), ref_cnt=" << get_reference_count());
-    file_->add_request_ref();
+    STXXL_VERBOSE3_THIS("request::(...), ref_cnt=" << get_reference_count());
+    m_file->add_request_ref();
 }
 
 request::~request()
 {
-    STXXL_VERBOSE3("[" << static_cast<void*>(this) << "] request::~(), ref_cnt=" << get_reference_count());
-}
-
-void request::completed()
-{
-    on_complete(this);
-    notify_waiters();
-    file_->delete_request_ref();
-    file_ = 0;
+    STXXL_VERBOSE3_THIS("request::~request(), ref_cnt=" << get_reference_count());
 }
 
 void request::check_alignment() const
 {
-    if (offset % BLOCK_ALIGN != 0)
+    if (m_offset % STXXL_BLOCK_ALIGN != 0)
         STXXL_ERRMSG("Offset is not aligned: modulo " <<
-                     BLOCK_ALIGN << " = " << offset % BLOCK_ALIGN);
+                     STXXL_BLOCK_ALIGN << " = " << m_offset % STXXL_BLOCK_ALIGN);
 
-    if (bytes % BLOCK_ALIGN != 0)
+    if (m_bytes % STXXL_BLOCK_ALIGN != 0)
         STXXL_ERRMSG("Size is not a multiple of " <<
-                     BLOCK_ALIGN << ", = " << bytes % BLOCK_ALIGN);
+                     STXXL_BLOCK_ALIGN << ", = " << m_bytes % STXXL_BLOCK_ALIGN);
 
-    if (unsigned_type(buffer) % BLOCK_ALIGN != 0)
+    if (unsigned_type(m_buffer) % STXXL_BLOCK_ALIGN != 0)
         STXXL_ERRMSG("Buffer is not aligned: modulo " <<
-                     BLOCK_ALIGN << " = " << unsigned_type(buffer) % BLOCK_ALIGN <<
-                     " (" << buffer << ")");
+                     STXXL_BLOCK_ALIGN << " = " << unsigned_type(m_buffer) % STXXL_BLOCK_ALIGN <<
+                     " (" << m_buffer << ")");
 }
 
 void request::check_nref_failed(bool after)
 {
     STXXL_ERRMSG("WARNING: serious error, reference to the request is lost " <<
-                 (after ? "after " : "before") << " serve" <<
+                 (after ? "after" : "before") << " serve()" <<
                  " nref=" << get_reference_count() <<
                  " this=" << this <<
-                 " offset=" << offset <<
-                 " buffer=" << buffer <<
-                 " bytes=" << bytes <<
-                 " type=" << ((type == READ) ? "READ" : "WRITE") <<
-                 " file=" << get_file() <<
-                 " iotype=" << get_file()->io_type()
+                 " offset=" << m_offset <<
+                 " buffer=" << m_buffer <<
+                 " bytes=" << m_bytes <<
+                 " type=" << ((m_type == READ) ? "READ" : "WRITE") <<
+                 " file=" << m_file <<
+                 " iotype=" << m_file->io_type()
                  );
 }
 
+const char* request::io_type() const
+{
+    return m_file->io_type();
+}
+
 std::ostream& request::print(std::ostream& out) const
 {
-    out << "File object address: " << static_cast<void*>(get_file());
-    out << " Buffer address: " << static_cast<void*>(get_buffer());
-    out << " File offset: " << get_offset();
-    out << " Transfer size: " << get_size() << " bytes";
-    out << " Type of transfer: " << ((get_type() == READ) ? "READ" : "WRITE");
+    out << "File object address: " << static_cast<void*>(m_file);
+    out << " Buffer address: " << static_cast<void*>(m_buffer);
+    out << " File offset: " << m_offset;
+    out << " Transfer size: " << m_bytes << " bytes";
+    out << " Type of transfer: " << ((m_type == READ) ? "READ" : "WRITE");
     return out;
 }
 
diff --git a/lib/io/request_queue_impl_1q.cpp b/lib/io/request_queue_impl_1q.cpp
index 581e31d..554978a 100644
--- a/lib/io/request_queue_impl_1q.cpp
+++ b/lib/io/request_queue_impl_1q.cpp
@@ -18,7 +18,7 @@
 #include <stxxl/bits/config.h>
 #include <stxxl/bits/common/error_handling.h>
 #include <stxxl/bits/io/request_queue_impl_1q.h>
-#include <stxxl/bits/io/request_with_state.h>
+#include <stxxl/bits/io/serving_request.h>
 #include <stxxl/bits/parallel.h>
 
 #if STXXL_STD_THREADS && STXXL_MSVC >= 1700
@@ -44,10 +44,10 @@ struct file_offset_match : public std::binary_function<request_ptr, request_ptr,
 };
 
 request_queue_impl_1q::request_queue_impl_1q(int n)
-    : m_thread_state(NOT_RUNNING), sem(0)
+    : m_thread_state(NOT_RUNNING), m_sem(0)
 {
     STXXL_UNUSED(n);
-    start_thread(worker, static_cast<void*>(this), thread, m_thread_state);
+    start_thread(worker, static_cast<void*>(this), m_thread, m_thread_state);
 }
 
 void request_queue_impl_1q::add_request(request_ptr& req)
@@ -56,22 +56,24 @@ void request_queue_impl_1q::add_request(request_ptr& req)
         STXXL_THROW_INVALID_ARGUMENT("Empty request submitted to disk_queue.");
     if (m_thread_state() != RUNNING)
         STXXL_THROW_INVALID_ARGUMENT("Request submitted to not running queue.");
+    if (!dynamic_cast<serving_request*>(req.get()))
+        STXXL_ERRMSG("Incompatible request submitted to running queue.");
 
 #if STXXL_CHECK_FOR_PENDING_REQUESTS_ON_SUBMISSION
     {
-        scoped_mutex_lock Lock(queue_mutex);
-        if (std::find_if(queue.begin(), queue.end(),
+        scoped_mutex_lock Lock(m_queue_mutex);
+        if (std::find_if(m_queue.begin(), m_queue.end(),
                          bind2nd(file_offset_match(), req) _STXXL_FORCE_SEQUENTIAL)
-            != queue.end())
+            != m_queue.end())
         {
             STXXL_ERRMSG("request submitted for a BID with a pending request");
         }
     }
 #endif
-    scoped_mutex_lock Lock(queue_mutex);
-    queue.push_back(req);
+    scoped_mutex_lock Lock(m_queue_mutex);
+    m_queue.push_back(req);
 
-    sem++;
+    m_sem++;
 }
 
 bool request_queue_impl_1q::cancel_request(request_ptr& req)
@@ -80,16 +82,21 @@ bool request_queue_impl_1q::cancel_request(request_ptr& req)
         STXXL_THROW_INVALID_ARGUMENT("Empty request canceled disk_queue.");
     if (m_thread_state() != RUNNING)
         STXXL_THROW_INVALID_ARGUMENT("Request canceled to not running queue.");
+    if (!dynamic_cast<serving_request*>(req.get()))
+        STXXL_ERRMSG("Incompatible request submitted to running queue.");
 
     bool was_still_in_queue = false;
     {
-        scoped_mutex_lock Lock(queue_mutex);
-        queue_type::iterator pos;
-        if ((pos = std::find(queue.begin(), queue.end(), req _STXXL_FORCE_SEQUENTIAL)) != queue.end())
+        scoped_mutex_lock Lock(m_queue_mutex);
+        queue_type::iterator pos
+            = std::find(m_queue.begin(), m_queue.end(),
+                        req _STXXL_FORCE_SEQUENTIAL);
+
+        if (pos != m_queue.end())
         {
-            queue.erase(pos);
+            m_queue.erase(pos);
             was_still_in_queue = true;
-            sem--;
+            m_sem--;
         }
     }
 
@@ -98,7 +105,7 @@ bool request_queue_impl_1q::cancel_request(request_ptr& req)
 
 request_queue_impl_1q::~request_queue_impl_1q()
 {
-    stop_thread(thread, m_thread_state, sem);
+    stop_thread(m_thread, m_thread_state, m_sem);
 }
 
 void* request_queue_impl_1q::worker(void* arg)
@@ -107,34 +114,34 @@ void* request_queue_impl_1q::worker(void* arg)
 
     for ( ; ; )
     {
-        pthis->sem--;
+        pthis->m_sem--;
 
         {
-            scoped_mutex_lock Lock(pthis->queue_mutex);
-            if (!pthis->queue.empty())
+            scoped_mutex_lock Lock(pthis->m_queue_mutex);
+            if (!pthis->m_queue.empty())
             {
-                request_ptr req = pthis->queue.front();
-                pthis->queue.pop_front();
+                request_ptr req = pthis->m_queue.front();
+                pthis->m_queue.pop_front();
 
                 Lock.unlock();
 
                 //assert(req->nref() > 1);
-                req->serve();
+                dynamic_cast<serving_request*>(req.get())->serve();
             }
             else
             {
                 Lock.unlock();
 
-                pthis->sem++;
+                pthis->m_sem++;
             }
         }
 
         // terminate if it has been requested and queues are empty
         if (pthis->m_thread_state() == TERMINATING) {
-            if ((pthis->sem--) == 0)
+            if ((pthis->m_sem--) == 0)
                 break;
             else
-                pthis->sem++;
+                pthis->m_sem++;
         }
     }
 
diff --git a/lib/io/request_queue_impl_qwqr.cpp b/lib/io/request_queue_impl_qwqr.cpp
index 8b54930..27e471d 100644
--- a/lib/io/request_queue_impl_qwqr.cpp
+++ b/lib/io/request_queue_impl_qwqr.cpp
@@ -17,7 +17,7 @@
 
 #include <stxxl/bits/common/error_handling.h>
 #include <stxxl/bits/io/request_queue_impl_qwqr.h>
-#include <stxxl/bits/io/request_with_state.h>
+#include <stxxl/bits/io/serving_request.h>
 #include <stxxl/bits/parallel.h>
 
 #if STXXL_STD_THREADS && STXXL_MSVC >= 1700
@@ -43,10 +43,10 @@ struct file_offset_match : public std::binary_function<request_ptr, request_ptr,
 };
 
 request_queue_impl_qwqr::request_queue_impl_qwqr(int n)
-    : m_thread_state(NOT_RUNNING), sem(0)
+    : m_thread_state(NOT_RUNNING), m_sem(0)
 {
     STXXL_UNUSED(n);
-    start_thread(worker, static_cast<void*>(this), thread, m_thread_state);
+    start_thread(worker, static_cast<void*>(this), m_thread, m_thread_state);
 }
 
 void request_queue_impl_qwqr::add_request(request_ptr& req)
@@ -55,41 +55,43 @@ void request_queue_impl_qwqr::add_request(request_ptr& req)
         STXXL_THROW_INVALID_ARGUMENT("Empty request submitted to disk_queue.");
     if (m_thread_state() != RUNNING)
         STXXL_THROW_INVALID_ARGUMENT("Request submitted to not running queue.");
+    if (!dynamic_cast<serving_request*>(req.get()))
+        STXXL_ERRMSG("Incompatible request submitted to running queue.");
 
     if (req.get()->get_type() == request::READ)
     {
 #if STXXL_CHECK_FOR_PENDING_REQUESTS_ON_SUBMISSION
         {
-            scoped_mutex_lock Lock(write_mutex);
-            if (std::find_if(write_queue.begin(), write_queue.end(),
+            scoped_mutex_lock Lock(m_write_mutex);
+            if (std::find_if(m_write_queue.begin(), m_write_queue.end(),
                              bind2nd(file_offset_match(), req) _STXXL_FORCE_SEQUENTIAL)
-                != write_queue.end())
+                != m_write_queue.end())
             {
                 STXXL_ERRMSG("READ request submitted for a BID with a pending WRITE request");
             }
         }
 #endif
-        scoped_mutex_lock Lock(read_mutex);
-        read_queue.push_back(req);
+        scoped_mutex_lock Lock(m_read_mutex);
+        m_read_queue.push_back(req);
     }
     else
     {
 #if STXXL_CHECK_FOR_PENDING_REQUESTS_ON_SUBMISSION
         {
-            scoped_mutex_lock Lock(read_mutex);
-            if (std::find_if(read_queue.begin(), read_queue.end(),
+            scoped_mutex_lock Lock(m_read_mutex);
+            if (std::find_if(m_read_queue.begin(), m_read_queue.end(),
                              bind2nd(file_offset_match(), req) _STXXL_FORCE_SEQUENTIAL)
-                != read_queue.end())
+                != m_read_queue.end())
             {
                 STXXL_ERRMSG("WRITE request submitted for a BID with a pending READ request");
             }
         }
 #endif
-        scoped_mutex_lock Lock(write_mutex);
-        write_queue.push_back(req);
+        scoped_mutex_lock Lock(m_write_mutex);
+        m_write_queue.push_back(req);
     }
 
-    sem++;
+    m_sem++;
 }
 
 bool request_queue_impl_qwqr::cancel_request(request_ptr& req)
@@ -98,28 +100,34 @@ bool request_queue_impl_qwqr::cancel_request(request_ptr& req)
         STXXL_THROW_INVALID_ARGUMENT("Empty request canceled disk_queue.");
     if (m_thread_state() != RUNNING)
         STXXL_THROW_INVALID_ARGUMENT("Request canceled to not running queue.");
+    if (!dynamic_cast<serving_request*>(req.get()))
+        STXXL_ERRMSG("Incompatible request submitted to running queue.");
 
     bool was_still_in_queue = false;
     if (req.get()->get_type() == request::READ)
     {
-        scoped_mutex_lock Lock(read_mutex);
-        queue_type::iterator pos;
-        if ((pos = std::find(read_queue.begin(), read_queue.end(), req _STXXL_FORCE_SEQUENTIAL)) != read_queue.end())
+        scoped_mutex_lock Lock(m_read_mutex);
+        queue_type::iterator pos
+            = std::find(m_read_queue.begin(), m_read_queue.end(),
+                        req _STXXL_FORCE_SEQUENTIAL);
+        if (pos != m_read_queue.end())
         {
-            read_queue.erase(pos);
+            m_read_queue.erase(pos);
             was_still_in_queue = true;
-            sem--;
+            m_sem--;
         }
     }
     else
     {
-        scoped_mutex_lock Lock(write_mutex);
-        queue_type::iterator pos;
-        if ((pos = std::find(write_queue.begin(), write_queue.end(), req _STXXL_FORCE_SEQUENTIAL)) != write_queue.end())
+        scoped_mutex_lock Lock(m_write_mutex);
+        queue_type::iterator pos
+            = std::find(m_write_queue.begin(), m_write_queue.end(),
+                        req _STXXL_FORCE_SEQUENTIAL);
+        if (pos != m_write_queue.end())
         {
-            write_queue.erase(pos);
+            m_write_queue.erase(pos);
             was_still_in_queue = true;
-            sem--;
+            m_sem--;
         }
     }
 
@@ -128,7 +136,7 @@ bool request_queue_impl_qwqr::cancel_request(request_ptr& req)
 
 request_queue_impl_qwqr::~request_queue_impl_qwqr()
 {
-    stop_thread(thread, m_thread_state, sem);
+    stop_thread(m_thread, m_thread_state, m_sem);
 }
 
 void* request_queue_impl_qwqr::worker(void* arg)
@@ -138,72 +146,70 @@ void* request_queue_impl_qwqr::worker(void* arg)
     bool write_phase = true;
     for ( ; ; )
     {
-        pthis->sem--;
+        pthis->m_sem--;
 
         if (write_phase)
         {
-            scoped_mutex_lock WriteLock(pthis->write_mutex);
-            if (!pthis->write_queue.empty())
+            scoped_mutex_lock WriteLock(pthis->m_write_mutex);
+            if (!pthis->m_write_queue.empty())
             {
-                request_ptr req = pthis->write_queue.front();
-                pthis->write_queue.pop_front();
+                request_ptr req = pthis->m_write_queue.front();
+                pthis->m_write_queue.pop_front();
 
                 WriteLock.unlock();
 
-                //assert(req->nref() > 1);
-                req->serve();
+                //assert(req->get_reference_count()) > 1);
+                dynamic_cast<serving_request*>(req.get())->serve();
             }
             else
             {
                 WriteLock.unlock();
 
-                pthis->sem++;
+                pthis->m_sem++;
 
-                if (pthis->_priority_op == WRITE)
+                if (pthis->m_priority_op == WRITE)
                     write_phase = false;
             }
 
-            if (pthis->_priority_op == NONE
-                || pthis->_priority_op == READ)
+            if (pthis->m_priority_op == NONE || pthis->m_priority_op == READ)
                 write_phase = false;
         }
         else
         {
-            scoped_mutex_lock ReadLock(pthis->read_mutex);
+            scoped_mutex_lock ReadLock(pthis->m_read_mutex);
 
-            if (!pthis->read_queue.empty())
+            if (!pthis->m_read_queue.empty())
             {
-                request_ptr req = pthis->read_queue.front();
-                pthis->read_queue.pop_front();
+                request_ptr req = pthis->m_read_queue.front();
+                pthis->m_read_queue.pop_front();
 
                 ReadLock.unlock();
 
-                STXXL_VERBOSE2("queue: before serve request has " << req->nref() << " references ");
-                //assert(req->nref() > 1);
-                req->serve();
-                STXXL_VERBOSE2("queue: after serve request has " << req->nref() << " references ");
+                STXXL_VERBOSE2("queue: before serve request has " << req->get_reference_count() << " references ");
+                //assert(req->get_reference_count() > 1);
+                dynamic_cast<serving_request*>(req.get())->serve();
+                STXXL_VERBOSE2("queue: after serve request has " << req->get_reference_count() << " references ");
             }
             else
             {
                 ReadLock.unlock();
 
-                pthis->sem++;
+                pthis->m_sem++;
 
-                if (pthis->_priority_op == READ)
+                if (pthis->m_priority_op == READ)
                     write_phase = true;
             }
 
-            if (pthis->_priority_op == NONE
-                || pthis->_priority_op == WRITE)
+            if (pthis->m_priority_op == NONE || pthis->m_priority_op == WRITE)
                 write_phase = true;
         }
 
         // terminate if it has been requested and queues are empty
         if (pthis->m_thread_state() == TERMINATING) {
-            if ((pthis->sem--) == 0)
+            if ((pthis->m_sem--) == 0)
                 break;
             else
-                pthis->sem++;
+                pthis->m_sem++;
         }
     }
 
diff --git a/lib/io/request_queue_impl_worker.cpp b/lib/io/request_queue_impl_worker.cpp
index de5c1df..0ae2814 100644
--- a/lib/io/request_queue_impl_worker.cpp
+++ b/lib/io/request_queue_impl_worker.cpp
@@ -30,7 +30,6 @@
  #include <windows.h>
 #endif
 
-
 STXXL_BEGIN_NAMESPACE
 
 void request_queue_impl_worker::start_thread(void* (* worker)(void*), void* arg, thread_type& t, state<thread_state>& s)
diff --git a/lib/io/request_with_state.cpp b/lib/io/request_with_state.cpp
index ea2b94f..4560286 100644
--- a/lib/io/request_with_state.cpp
+++ b/lib/io/request_with_state.cpp
@@ -23,47 +23,46 @@
 
 #include <cassert>
 
-
 STXXL_BEGIN_NAMESPACE
 
 request_with_state::~request_with_state()
 {
-    STXXL_VERBOSE3("[" << static_cast<void*>(this) << "] request_with_state::~(), ref_cnt: " << ref_cnt);
+    STXXL_VERBOSE3_THIS("request_with_state::~(), ref_cnt: " << get_reference_count());
 
-    assert(_state() == DONE || _state() == READY2DIE);
+    assert(m_state() == DONE || m_state() == READY2DIE);
 
-    // if(_state() != DONE && _state()!= READY2DIE )
+    // if(m_state() != DONE && m_state()!= READY2DIE )
     // STXXL_ERRMSG("WARNING: serious stxxl inconsistency: Request is being deleted while I/O not finished. "<<
     //              "Please submit a bug report.");
 
-    // _state.wait_for (READY2DIE); // does not make sense ?
+    // m_state.wait_for (READY2DIE); // does not make sense ?
 }
 
 void request_with_state::wait(bool measure_time)
 {
-    STXXL_VERBOSE3("[" << static_cast<void*>(this) << "] request_with_state::wait()");
+    STXXL_VERBOSE3_THIS("request_with_state::wait()");
 
-    stats::scoped_wait_timer wait_timer(get_type() == READ ? stats::WAIT_OP_READ : stats::WAIT_OP_WRITE, measure_time);
+    stats::scoped_wait_timer wait_timer(m_type == READ ? stats::WAIT_OP_READ : stats::WAIT_OP_WRITE, measure_time);
 
-    _state.wait_for(READY2DIE);
+    m_state.wait_for(READY2DIE);
 
     check_errors();
 }
 
 bool request_with_state::cancel()
 {
-    STXXL_VERBOSE3("[" << static_cast<void*>(this) << "] request_with_state::cancel() " << file_ << " " << buffer << " " << offset);
+    STXXL_VERBOSE3_THIS("request_with_state::cancel() " << m_file << " " << m_buffer << " " << m_offset);
 
-    if (file_)
+    if (m_file)
     {
         request_ptr rp(this);
-        if (disk_queues::get_instance()->cancel_request(rp, file_->get_queue_id()))
+        if (disk_queues::get_instance()->cancel_request(rp, m_file->get_queue_id()))
         {
-            _state.set_to(DONE);
+            m_state.set_to(DONE);
             notify_waiters();
-            file_->delete_request_ref();
-            file_ = 0;
-            _state.set_to(READY2DIE);
+            m_file->delete_request_ref();
+            m_file = 0;
+            m_state.set_to(READY2DIE);
             return true;
         }
     }
@@ -72,12 +71,25 @@ bool request_with_state::cancel()
 
 bool request_with_state::poll()
 {
-    const request_state s = _state();
+    const request_state s = m_state();
 
     check_errors();
 
     return s == DONE || s == READY2DIE;
 }
 
+void request_with_state::completed(bool canceled)
+{
+    STXXL_VERBOSE3_THIS("request_with_state::completed()");
+    m_state.set_to(DONE);
+    if (!canceled)
+        m_on_complete(this);
+    notify_waiters();
+    m_file->delete_request_ref();
+    m_file = 0;
+    m_state.set_to(READY2DIE);
+}
+
 STXXL_END_NAMESPACE
+
 // vim: et:ts=4:sw=4
diff --git a/lib/io/request_with_waiters.cpp b/lib/io/request_with_waiters.cpp
index aac6e89..962d3e4 100644
--- a/lib/io/request_with_waiters.cpp
+++ b/lib/io/request_with_waiters.cpp
@@ -19,7 +19,6 @@
 #include <algorithm>
 #include <functional>
 
-
 STXXL_BEGIN_NAMESPACE
 
 bool request_with_waiters::add_waiter(onoff_switch* sw)
@@ -28,40 +27,38 @@ bool request_with_waiters::add_waiter(onoff_switch* sw)
     // condition might occur: the state might change and notify_waiters()
     // could be called between poll() and insert() resulting in waiter sw
     // never being notified
-    scoped_mutex_lock lock(waiters_mutex);
+    scoped_mutex_lock lock(m_waiters_mutex);
 
     if (poll())                     // request already finished
     {
         return true;
     }
 
-    waiters.insert(sw);
+    m_waiters.insert(sw);
 
     return false;
 }
 
 void request_with_waiters::delete_waiter(onoff_switch* sw)
 {
-    scoped_mutex_lock lock(waiters_mutex);
-    waiters.erase(sw);
+    scoped_mutex_lock lock(m_waiters_mutex);
+    m_waiters.erase(sw);
 }
 
 void request_with_waiters::notify_waiters()
 {
-    scoped_mutex_lock lock(waiters_mutex);
-    std::for_each(waiters.begin(),
-                  waiters.end(),
+    scoped_mutex_lock lock(m_waiters_mutex);
+    std::for_each(m_waiters.begin(),
+                  m_waiters.end(),
                   std::mem_fun(&onoff_switch::on)
                   _STXXL_FORCE_SEQUENTIAL);
 }
 
-/*
-int request_with_waiters::nwaiters()
+size_t request_with_waiters::num_waiters()
 {
-    scoped_mutex_lock lock(waiters_mutex);
-    return waiters.size();
+    scoped_mutex_lock lock(m_waiters_mutex);
+    return m_waiters.size();
 }
-*/
 
 STXXL_END_NAMESPACE
 // vim: et:ts=4:sw=4
diff --git a/lib/io/serving_request.cpp b/lib/io/serving_request.cpp
index d3de834..cfb1a67 100644
--- a/lib/io/serving_request.cpp
+++ b/lib/io/serving_request.cpp
@@ -22,7 +22,6 @@
 
 #include <iomanip>
 
-
 STXXL_BEGIN_NAMESPACE
 
 serving_request::serving_request(
@@ -31,8 +30,8 @@ serving_request::serving_request(
     void* buf,
     offset_type off,
     size_type b,
-    request_type t) :
-    request_with_state(on_cmpl, f, buf, off, b, t)
+    request_type t)
+    : request_with_state(on_cmpl, f, buf, off, b, t)
 {
 #ifdef STXXL_CHECK_BLOCK_ALIGNING
     // Direct I/O requires file system block size alignment for file offsets,
@@ -45,17 +44,17 @@ serving_request::serving_request(
 void serving_request::serve()
 {
     check_nref();
-    STXXL_VERBOSE2(
-        "[" << static_cast<void*>(this) << "] serving_request::serve(): " <<
-        buffer << " @ [" <<
-        file_ << "|" << file_->get_allocator_id() << "]0x" <<
+    STXXL_VERBOSE2_THIS(
+        "serving_request::serve(): " <<
+        m_buffer << " @ [" <<
+        m_file << "|" << m_file->get_allocator_id() << "]0x" <<
         std::hex << std::setfill('0') << std::setw(8) <<
-        offset << "/0x" << bytes <<
-        ((type == request::READ) ? " READ" : " WRITE"));
+        m_offset << "/0x" << m_bytes <<
+        ((m_type == request::READ) ? " READ" : " WRITE"));
 
     try
     {
-        file_->serve(this);
+        m_file->serve(m_buffer, m_offset, m_bytes, m_type);
     }
     catch (const io_error& ex)
     {
@@ -64,20 +63,12 @@ void serving_request::serve()
 
     check_nref(true);
 
-    completed();
-}
-
-void serving_request::completed()
-{
-    STXXL_VERBOSE2("[" << static_cast<void*>(this) << "] serving_request::completed()");
-    _state.set_to(DONE);
-    request_with_state::completed();
-    _state.set_to(READY2DIE);
+    completed(false);
 }
 
 const char* serving_request::io_type() const
 {
-    return file_->io_type();
+    return m_file->io_type();
 }
 
 STXXL_END_NAMESPACE
diff --git a/lib/io/simdisk_file.cpp b/lib/io/simdisk_file.cpp
index 8114e00..a39f58b 100644
--- a/lib/io/simdisk_file.cpp
+++ b/lib/io/simdisk_file.cpp
@@ -22,6 +22,7 @@
 
 STXXL_BEGIN_NAMESPACE
 
+const double simdisk_geometry::s_average_speed = (15 * 1024 * 1024);
 
 void simdisk_geometry::add_zone(int& first_cyl, int last_cyl,
                                 int sec_per_track, int& first_sect)
@@ -52,7 +53,7 @@ double simdisk_geometry::get_delay(file::offset_type offset, file::size_type siz
         double(interface_speed);
 
     std::set<Zone, ZoneCmp>::iterator zone = zones.lower_bound(first_sect);
-    //std::cout << __PRETTY_FUNCTION__ << " " << (*zone).first_sector << std::endl;
+    //std::cout << __FUNCTION__ << " " << (*zone).first_sector << std::endl;
     while (1)
     {
         int from_this_zone =
@@ -78,11 +79,10 @@ double simdisk_geometry::get_delay(file::offset_type offset, file::size_type siz
     return delay;
 #else
     STXXL_UNUSED(offset);
-    return double(size) / double(AVERAGE_SPEED);
+    return double(size) / s_average_speed;
 #endif
 }
 
-
 IC35L080AVVA07::IC35L080AVVA07()
 {
     std::cout << "Creating IBM 120GXP IC35L080AVVA07" <<
@@ -156,14 +156,11 @@ IC35L080AVVA07::IC35L080AVVA07()
 
 ////////////////////////////////////////////////////////////////////////////
 
-void sim_disk_file::serve(const request* req) throw (io_error)
+void sim_disk_file::serve(void* buffer, offset_type offset, size_type bytes,
+                          request::request_type type)
 {
     scoped_mutex_lock fd_lock(fd_mutex);
-    assert(req->get_file() == this);
-    offset_type offset = req->get_offset();
-    void* buffer = req->get_buffer();
-    size_type bytes = req->get_size();
-    request::request_type type = req->get_type();
+
     double op_start = timestamp();
 
     stats::scoped_read_write_timer read_write_timer(bytes, type == request::WRITE);
@@ -205,7 +202,7 @@ void sim_disk_file::serve(const request* req) throw (io_error)
     if (seconds_to_wait)
         sleep(seconds_to_wait);
 
-    usleep((unsigned long)((delay - seconds_to_wait) * 1000000.));
+    usleep((useconds_t)((delay - seconds_to_wait) * 1000000.));
 }
 
 const char* sim_disk_file::io_type() const
diff --git a/lib/io/syscall_file.cpp b/lib/io/syscall_file.cpp
index e91a101..1f84a77 100644
--- a/lib/io/syscall_file.cpp
+++ b/lib/io/syscall_file.cpp
@@ -21,17 +21,14 @@
 #include <stxxl/bits/io/syscall_file.h>
 #include "ufs_platform.h"
 
-
 STXXL_BEGIN_NAMESPACE
 
-void syscall_file::serve(const request* req) throw (io_error)
+void syscall_file::serve(void* buffer, offset_type offset, size_type bytes,
+                         request::request_type type)
 {
     scoped_mutex_lock fd_lock(fd_mutex);
-    assert(req->get_file() == this);
-    offset_type offset = req->get_offset();
-    char* buffer = static_cast<char*>(req->get_buffer());
-    size_type bytes = req->get_size();
-    request::request_type type = req->get_type();
+
+    char* cbuffer = static_cast<char*>(buffer);
 
     stats::scoped_read_write_timer read_write_timer(bytes, type == request::WRITE);
 
@@ -47,7 +44,7 @@ void syscall_file::serve(const request* req) throw (io_error)
                 " path=" << filename <<
                 " fd=" << file_des <<
                 " offset=" << offset <<
-                " buffer=" << (void*)buffer <<
+                " buffer=" << cbuffer <<
                 " bytes=" << bytes <<
                 " type=" << ((type == request::READ) ? "READ" : "WRITE") <<
                 " rc=" << rc);
@@ -57,9 +54,9 @@ void syscall_file::serve(const request* req) throw (io_error)
         {
 #if STXXL_MSVC
             assert(bytes <= std::numeric_limits<unsigned int>::max());
-            if ((rc = ::read(file_des, buffer, (unsigned int)bytes)) <= 0)
+            if ((rc = ::read(file_des, cbuffer, (unsigned int)bytes)) <= 0)
 #else
-            if ((rc = ::read(file_des, buffer, bytes)) <= 0)
+            if ((rc = ::read(file_des, cbuffer, bytes)) <= 0)
 #endif
             {
                 STXXL_THROW_ERRNO
@@ -69,20 +66,20 @@ void syscall_file::serve(const request* req) throw (io_error)
                     " path=" << filename <<
                     " fd=" << file_des <<
                     " offset=" << offset <<
-                    " buffer=" << (void*)buffer <<
+                    " buffer=" << buffer <<
                     " bytes=" << bytes <<
                     " type=" << "READ" <<
                     " rc=" << rc);
             }
-            bytes -= rc;
+            bytes = (size_type)(bytes - rc);
             offset += rc;
-            buffer += rc;
+            cbuffer += rc;
 
             if (bytes > 0 && offset == this->_size())
             {
                 // read request extends past end-of-file
                 // fill reminder with zeroes
-                memset(buffer, 0, bytes);
+                memset(cbuffer, 0, bytes);
                 bytes = 0;
             }
         }
@@ -90,9 +87,9 @@ void syscall_file::serve(const request* req) throw (io_error)
         {
 #if STXXL_MSVC
             assert(bytes <= std::numeric_limits<unsigned int>::max());
-            if ((rc = ::write(file_des, buffer, (unsigned int)bytes)) <= 0)
+            if ((rc = ::write(file_des, cbuffer, (unsigned int)bytes)) <= 0)
 #else
-            if ((rc = ::write(file_des, buffer, bytes)) <= 0)
+            if ((rc = ::write(file_des, cbuffer, bytes)) <= 0)
 #endif
             {
                 STXXL_THROW_ERRNO
@@ -102,14 +99,14 @@ void syscall_file::serve(const request* req) throw (io_error)
                     " path=" << filename <<
                     " fd=" << file_des <<
                     " offset=" << offset <<
-                    " buffer=" << (void*)buffer <<
+                    " buffer=" << buffer <<
                     " bytes=" << bytes <<
                     " type=" << "WRITE" <<
                     " rc=" << rc);
             }
-            bytes -= rc;
+            bytes = (size_type)(bytes - rc);
             offset += rc;
-            buffer += rc;
+            cbuffer += rc;
         }
     }
 }
diff --git a/lib/io/ufs_file_base.cpp b/lib/io/ufs_file_base.cpp
index 539f9a2..e7993f6 100644
--- a/lib/io/ufs_file_base.cpp
+++ b/lib/io/ufs_file_base.cpp
@@ -31,7 +31,8 @@ const char* ufs_file_base::io_type() const
 
 ufs_file_base::ufs_file_base(
     const std::string& filename,
-    int mode) : file_des(-1), m_mode(mode), filename(filename)
+    int mode)
+    : file_des(-1), m_mode(mode), filename(filename)
 {
     int flags = 0;
 
@@ -183,7 +184,7 @@ void ufs_file_base::lock()
 #else
     scoped_mutex_lock fd_lock(fd_mutex);
     struct flock lock_struct;
-    lock_struct.l_type = (m_mode & RDONLY) ? F_RDLCK : F_RDLCK | F_WRLCK;
+    lock_struct.l_type = (short)(m_mode & RDONLY ? F_RDLCK : F_RDLCK | F_WRLCK);
     lock_struct.l_whence = SEEK_SET;
     lock_struct.l_start = 0;
     lock_struct.l_len = 0; // lock all bytes
diff --git a/lib/io/wbtl_file.cpp b/lib/io/wbtl_file.cpp
index 7952a11..0fee4a1 100644
--- a/lib/io/wbtl_file.cpp
+++ b/lib/io/wbtl_file.cpp
@@ -28,42 +28,36 @@
 #define STXXL_VERBOSE_WBTL STXXL_VERBOSE2
 #endif
 
-
 STXXL_BEGIN_NAMESPACE
 
-
 wbtl_file::wbtl_file(
     file* backend_file,
     size_type write_buffer_size,
     int write_buffers,
-    int queue_id, int allocator_id) :
-    disk_queued_file(queue_id, allocator_id), storage(backend_file), sz(0), write_block_size(write_buffer_size),
-    free_bytes(0), curbuf(1), curpos(write_block_size)
+    int queue_id, int allocator_id)
+    : disk_queued_file(queue_id, allocator_id), storage(backend_file),
+      sz(0), write_block_size(write_buffer_size),
+      free_bytes(0), curbuf(1), curpos(write_block_size)
 {
     STXXL_UNUSED(write_buffers);
     assert(write_buffers == 2); // currently hardcoded
-    write_buffer[0] = static_cast<char*>(stxxl::aligned_alloc<BLOCK_ALIGN>(write_block_size));
-    write_buffer[1] = static_cast<char*>(stxxl::aligned_alloc<BLOCK_ALIGN>(write_block_size));
+    write_buffer[0] = static_cast<char*>(stxxl::aligned_alloc<STXXL_BLOCK_ALIGN>(write_block_size));
+    write_buffer[1] = static_cast<char*>(stxxl::aligned_alloc<STXXL_BLOCK_ALIGN>(write_block_size));
     buffer_address[0] = offset_type(-1);
     buffer_address[1] = offset_type(-1);
 }
 
 wbtl_file::~wbtl_file()
 {
-    stxxl::aligned_dealloc<BLOCK_ALIGN>(write_buffer[1]);
-    stxxl::aligned_dealloc<BLOCK_ALIGN>(write_buffer[0]);
+    stxxl::aligned_dealloc<STXXL_BLOCK_ALIGN>(write_buffer[1]);
+    stxxl::aligned_dealloc<STXXL_BLOCK_ALIGN>(write_buffer[0]);
     delete storage;
     storage = 0;
 }
 
-void wbtl_file::serve(const request* req) throw (io_error)
+void wbtl_file::serve(void* buffer, offset_type offset, size_type bytes,
+                      request::request_type type)
 {
-    assert(req->get_file() == this);
-    offset_type offset = req->get_offset();
-    void* buffer = req->get_buffer();
-    size_type bytes = req->get_size();
-    request::request_type type = req->get_type();
-
     if (type == request::READ)
     {
         //stats::scoped_read_timer read_timer(size());
@@ -243,7 +237,7 @@ void wbtl_file::sread(void* buffer, offset_type offset, size_type bytes)
     else
     {
         // block is not cached
-        request_ptr req = storage->aread(buffer, physical_offset, bytes, default_completion_handler());
+        request_ptr req = storage->aread(buffer, physical_offset, bytes);
         req->wait(false);
     }
     STXXL_VERBOSE_WBTL("wbtl:sread   l" << FMT_A_S(offset, bytes) << " @    p" << FMT_A(physical_offset) << " " << std::dec << cached);
@@ -281,7 +275,7 @@ void wbtl_file::swrite(void* buffer, offset_type offset, size_type bytes)
                 backend_request->wait(false);
             }
 
-            backend_request = storage->awrite(write_buffer[curbuf], buffer_address[curbuf], write_block_size, default_completion_handler());
+            backend_request = storage->awrite(write_buffer[curbuf], buffer_address[curbuf], write_block_size);
         }
 
         curbuf = 1 - curbuf;
diff --git a/lib/io/wfs_file_base.cpp b/lib/io/wfs_file_base.cpp
index bf8f2fd..e2a0600 100644
--- a/lib/io/wfs_file_base.cpp
+++ b/lib/io/wfs_file_base.cpp
@@ -24,7 +24,6 @@
 
 STXXL_BEGIN_NAMESPACE
 
-
 const char* wfs_file_base::io_type() const
 {
     return "wfs_base";
@@ -75,8 +74,7 @@ static HANDLE open_file_impl(const std::string& filename, int mode)
 #else
         if (mode & file::REQUIRE_DIRECT) {
             STXXL_ERRMSG("Error: open()ing " << filename << " with DIRECT mode required, but the system does not support it.");
-            file_des = INVALID_HANDLE_VALUE;
-            return;
+            return INVALID_HANDLE_VALUE;
         }
         else {
             STXXL_MSG("Warning: open()ing " << filename << " without DIRECT mode, as the system does not support it.");
diff --git a/lib/io/wincall_file.cpp b/lib/io/wincall_file.cpp
index 2e885d5..7fd9613 100644
--- a/lib/io/wincall_file.cpp
+++ b/lib/io/wincall_file.cpp
@@ -25,15 +25,10 @@
 
 STXXL_BEGIN_NAMESPACE
 
-
-void wincall_file::serve(const request* req) throw (io_error)
+void wincall_file::serve(void* buffer, offset_type offset, size_type bytes,
+                         request::request_type type)
 {
     scoped_mutex_lock fd_lock(fd_mutex);
-    assert(req->get_file() == this);
-    offset_type offset = req->get_offset();
-    void* buffer = req->get_buffer();
-    size_type bytes = req->get_size();
-    request::request_type type = req->get_type();
 
     if (bytes > 32 * 1024 * 1024) {
         STXXL_ERRMSG("Using a block size larger than 32 MiB may not work with the " << io_type() << " filetype");
diff --git a/lib/mng/config.cpp b/lib/mng/config.cpp
index 4b9e5a5..daaedb9 100644
--- a/lib/mng/config.cpp
+++ b/lib/mng/config.cpp
@@ -60,6 +60,8 @@ void config::initialize()
         find_config();
     }
 
+    m_max_device_id = 0;
+
     is_initialized = true;
 }
 
@@ -172,6 +174,25 @@ void config::load_config_file(const std::string& config_path)
     }
 }
 
+//! Returns automatic physical device id counter
+unsigned int config::get_max_device_id()
+{
+    return m_max_device_id;
+}
+
+//! Returns next automatic physical device id counter
+unsigned int config::get_next_device_id()
+{
+    return m_max_device_id++;
+}
+
+//! Update the automatic physical device id counter
+void config::update_max_device_id(unsigned int devid)
+{
+    if (m_max_device_id < devid + 1)
+        m_max_device_id = devid + 1;
+}
+
 uint64 config::total_size() const
 {
     assert(is_initialized);
@@ -187,6 +208,8 @@ uint64 config::total_size() const
     return total_size;
 }
 
+////////////////////////////////////////////////////////////////////////////////
+
 disk_config::disk_config()
     : size(0),
       autogrow(false),
@@ -194,8 +217,10 @@ disk_config::disk_config()
       direct(DIRECT_TRY),
       flash(false),
       queue(file::DEFAULT_QUEUE),
+      device_id(file::DEFAULT_DEVICE_ID),
       raw_device(false),
-      unlink_on_open(false)
+      unlink_on_open(false),
+      queue_length(0)
 { }
 
 disk_config::disk_config(const std::string& _path, uint64 _size,
@@ -208,8 +233,10 @@ disk_config::disk_config(const std::string& _path, uint64 _size,
       direct(DIRECT_TRY),
       flash(false),
       queue(file::DEFAULT_QUEUE),
+      device_id(file::DEFAULT_DEVICE_ID),
       raw_device(false),
-      unlink_on_open(false)
+      unlink_on_open(false),
+      queue_length(0)
 {
     parse_fileio();
 }
@@ -221,8 +248,10 @@ disk_config::disk_config(const std::string& line)
       direct(DIRECT_TRY),
       flash(false),
       queue(file::DEFAULT_QUEUE),
+      device_id(file::DEFAULT_DEVICE_ID),
       raw_device(false),
-      unlink_on_open(false)
+      unlink_on_open(false),
+      queue_length(0)
 {
     parse_line(line);
 }
@@ -250,6 +279,7 @@ void disk_config::parse_line(const std::string& line)
     direct = DIRECT_TRY;
     // flash is already set
     queue = file::DEFAULT_QUEUE;
+    device_id = file::DEFAULT_DEVICE_ID;
     unlink_on_open = false;
 
     // *** Save Basic Options ***
@@ -347,8 +377,21 @@ void disk_config::parse_fileio()
         }
         else if (eq[0] == "queue")
         {
+            if (io_impl == "linuxaio") {
+                STXXL_THROW(std::runtime_error, "Parameter '" << *p << "' invalid for fileio '" << io_impl << "' in disk configuration file.");
+            }
+
+            char* endp;
+            queue = (int)strtoul(eq[1].c_str(), &endp, 10);
+            if (endp && *endp != 0) {
+                STXXL_THROW(std::runtime_error,
+                            "Invalid parameter '" << *p << "' in disk configuration file.");
+            }
+        }
+        else if (eq[0] == "device_id" || eq[0] == "devid")
+        {
             char* endp;
-            queue = strtoul(eq[1].c_str(), &endp, 10);
+            device_id = (int)strtoul(eq[1].c_str(), &endp, 10);
             if (endp && *endp != 0) {
                 STXXL_THROW(std::runtime_error,
                             "Invalid parameter '" << *p << "' in disk configuration file.");
@@ -364,7 +407,9 @@ void disk_config::parse_fileio()
         }
         else if (*p == "unlink" || *p == "unlink_on_open")
         {
-            if (!(io_impl == "syscall" || io_impl == "mmap") || io_impl == "wbtl") {
+            if (!(io_impl == "syscall" || io_impl == "linuxaio" ||
+                  io_impl == "mmap" || io_impl == "wbtl"))
+            {
                 STXXL_THROW(std::runtime_error, "Parameter '" << *p << "' invalid for fileio '" << io_impl << "' in disk configuration file.");
             }
 
@@ -398,20 +443,26 @@ std::string disk_config::fileio_string() const
     else if (direct == DIRECT_ON)
         oss << " direct=on";
     else
-        assert(!"Invalid setting for 'direct' option.");
+        STXXL_THROW(std::runtime_error, "Invalid setting for 'direct' option.");
 
     if (flash)
         oss << " flash";
 
-    if (queue != file::DEFAULT_QUEUE)
+    if (queue != file::DEFAULT_QUEUE && queue != file::DEFAULT_LINUXAIO_QUEUE)
         oss << " queue=" << queue;
 
+    if (device_id != file::DEFAULT_DEVICE_ID)
+        oss << " devid=" << device_id;
+
     if (raw_device)
         oss << " raw_device";
 
     if (unlink_on_open)
         oss << " unlink_on_open";
 
+    if (queue_length != 0)
+        oss << " queue_length=" << queue_length;
+
     return oss.str();
 }
 
diff --git a/lib/mng/disk_allocator.cpp b/lib/mng/disk_allocator.cpp
index d8cce90..2e8f11d 100644
--- a/lib/mng/disk_allocator.cpp
+++ b/lib/mng/disk_allocator.cpp
@@ -23,7 +23,6 @@
 #include <ostream>
 #include <utility>
 
-
 STXXL_BEGIN_NAMESPACE
 
 void disk_allocator::dump() const
@@ -39,7 +38,6 @@ void disk_allocator::dump() const
     STXXL_ERRMSG("Total bytes: " << total);
 }
 
-
 void disk_allocator::deallocation_error(
     stxxl::int64 block_pos, stxxl::int64 block_size,
     const sortseq::iterator& pred, const sortseq::iterator& succ) const
@@ -63,7 +61,6 @@ void disk_allocator::deallocation_error(
     dump();
 }
 
-
 void disk_allocator::add_free_region(stxxl::int64 block_pos, stxxl::int64 block_size)
 {
     //assert(block_size);
diff --git a/local/test1.cpp b/local/test1.cpp
index b1ec733..eaba556 100644
--- a/local/test1.cpp
+++ b/local/test1.cpp
@@ -33,14 +33,24 @@ int main()
     stxxl::VECTOR_GENERATOR<int>::result vector;
 
     // fill vector with random integers
-    stxxl::random_number32 random;
+    {
+        stxxl::scoped_print_timer
+            timer("write random numbers", 100 * 1024 * 1024 * sizeof(int));
 
-    for (size_t i = 0; i < 100 * 1024 * 1024; ++i) {
-        vector.push_back(random());
+        stxxl::random_number32 random;
+
+        for (size_t i = 0; i < 100 * 1024 * 1024; ++i) {
+            vector.push_back(random());
+        }
     }
 
     // sort vector using 16 MiB RAM
-    stxxl::sort(vector.begin(), vector.end(), my_less_int(), 16 * 1024 * 1024);
+    {
+        stxxl::scoped_print_timer
+            timer("sorting random numbers", 100 * 1024 * 1024 * sizeof(int));
+
+        stxxl::sort(vector.begin(), vector.end(), my_less_int(), 16 * 1024 * 1024);
+    }
 
     // output first and last items:
     std::cout << vector.size() << " items sorted ranging from "
diff --git a/local/test2.cpp b/local/test2.cpp
new file mode 100644
index 0000000..34ceddb
--- /dev/null
+++ b/local/test2.cpp
@@ -0,0 +1,68 @@
+/***************************************************************************
+ *  local/test2.cpp
+ *
+ *  This is another example file included in the local/ directory of STXXL. All
+ *  .cpp files in local/ are automatically compiled and linked with STXXL by
+ *  CMake.  You can use this method for simple prototype applications.
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#include <iostream>
+#include <limits>
+
+#include <stxxl/random>
+#include <stxxl/sorter>
+
+struct my_less : std::less<int64_t>
+{
+    int64_t min_value() const { return std::numeric_limits<int64_t>::min(); }
+    int64_t max_value() const { return std::numeric_limits<int64_t>::max(); }
+};
+
+int main()
+{
+    stxxl::scoped_print_timer
+        timer("overall work", 600 * 1024 * 1024 * (int64_t)sizeof(int64_t));
+
+    // create sorter
+    stxxl::sorter<int64_t, my_less> sorter(my_less(), 256 * 1024 * 1024);
+
+    // fill sorter with random integers
+    {
+        stxxl::scoped_print_timer
+            timer("presort+write random numbers", 600 * 1024 * 1024 * (int64_t)sizeof(int64_t));
+
+        stxxl::random_number32 random;
+
+        for (size_t i = 0; i < 600 * 1024 * 1024; ++i) {
+            sorter.push(random() * random());
+        }
+    }
+
+    sorter.sort();
+
+    // get data back in sorted order
+    {
+        stxxl::scoped_print_timer
+            timer("read+merge random numbers", 600 * 1024 * 1024 * (int64_t)sizeof(int64_t));
+
+        int64_t first = *sorter, last = first, count = 1;
+        ++sorter;
+
+        while (!sorter.empty())
+            last = *sorter, ++sorter, ++count;
+
+        // output first and last items:
+        std::cout << count << " items sorted ranging from "
+                  << first << " to " << last << std::endl;
+    }
+
+    return 0;
+}
diff --git a/misc/analyze-source.pl b/misc/analyze-source.pl
index ad86316..b7ba4c5 100755
--- a/misc/analyze-source.pl
+++ b/misc/analyze-source.pl
@@ -7,7 +7,7 @@
 #
 #  Part of the STXXL. See http://stxxl.sourceforge.net
 #
-#  Copyright (C) 2013 Timo Bingmann <tb at panthema.net>
+#  Copyright (C) 2013-2014 Timo Bingmann <tb at panthema.net>
 #
 #  Distributed under the Boost Software License, Version 1.0.
 #  (See accompanying file LICENSE_1_0.txt or copy at
@@ -70,6 +70,22 @@ sub expect_re($$\$$) {
     }
 }
 
+# check equality of two arrays
+sub array_equal {
+    my ($a1ref,$a2ref) = @_;
+
+    my @a1 = @{$a1ref};
+    my @a2 = @{$a2ref};
+
+    return 0 if scalar(@a1) != scalar(@a2);
+
+    for my $i (0..scalar(@a1)-1) {
+        return 0 if $a1[$i] ne $a2[$i];
+    }
+
+    return 1;
+}
+
 # run $text through a external pipe (@program)
 sub filter_program {
     my $text = shift;
@@ -115,6 +131,8 @@ sub process_cpp {
     my @data = <F>;
     close(F);
 
+    my @origdata = @data;
+
     # put all #include lines into the includemap
     foreach my $ln (@data)
     {
@@ -123,6 +141,18 @@ sub process_cpp {
         }
     }
 
+    # check #include "stxxl..." use
+    {
+        foreach my $ln (@data)
+        {
+            if ($ln =~ m@\s*#\s*include\s*"stxxl\S+"@) {
+                print("#include \"stxxl...\" found in $path\n");
+                print $ln."\n";
+                system("emacsclient -n $path") if $launch_emacs;
+            }
+        }
+    }
+
     # check for assert() in test cases
     if ($path =~ /^test/)
     {
@@ -134,6 +164,32 @@ sub process_cpp {
         }
     }
 
+    # check for \brief doxygen commands
+    {
+        foreach my $ln (@data)
+        {
+            if ($ln =~ m!\\brief!) {
+                print("found brief command in $path\n");
+                system("emacsclient -n $path") if $launch_emacs;
+            }
+        }
+    }
+
+    # check for double underscores
+    {
+        foreach my $ln (@data)
+        {
+            next if $ln =~ /^\s*#(if|elif|define|error)/;
+            next if $path eq "include/stxxl/bits/common/types.h";
+
+            if ($ln =~ m@\s__(?!(gnu_parallel|gnu_cxx|glibcxx|typeof__|attribute__|sync_add_and_fetch|FILE__|LINE__|FUNCTION__))@) {
+                print("double-underscore found in $path\n");
+                print $ln."\n";
+                system("emacsclient -n $path") if $launch_emacs;
+            }
+        }
+    }
+
     # check source header
     my $i = 0;
     if ($data[$i] =~ m!// -.*- mode:!) { ++$i; } # skip emacs mode line
@@ -201,7 +257,7 @@ sub process_cpp {
                 splice(@uncrust, $i+1, 0, "\n");
                 ++$namespace;
             }
-            if ($uncrust[$i] =~ m!^} // namespace!) {
+            if ($uncrust[$i] =~ m!^} +// namespace!) {
                 splice(@uncrust, $i, 0, "\n"); ++$i;
                 --$namespace;
             }
@@ -209,18 +265,18 @@ sub process_cpp {
         if ($namespace != 0) {
             print "$path\n";
             print "    NAMESPACE MISMATCH!\n";
-            #system("emacsclient -n $path");
+            system("emacsclient -n $path") if $launch_emacs;
         }
 
-        if (!(@data ~~ @uncrust)) {
+        if (!array_equal(\@data,\@uncrust)) {
             print "$path\n";
             print diff(\@data, \@uncrust);
             @data = @uncrust;
-            #system("emacsclient -n $path");
+            system("emacsclient -n $path") if $launch_emacs;
         }
     }
 
-    if ($write_changes)
+    if ($write_changes && !array_equal(\@data,\@origdata))
     {
         open(F, "> $path") or die("Cannot write $path: $!");
         print(F join("", @data));
diff --git a/misc/cmake/GetGitRevisionDescription.cmake b/misc/cmake/GetGitRevisionDescription.cmake
deleted file mode 100644
index 1bf0230..0000000
--- a/misc/cmake/GetGitRevisionDescription.cmake
+++ /dev/null
@@ -1,123 +0,0 @@
-# - Returns a version string from Git
-#
-# These functions force a re-configure on each git commit so that you can
-# trust the values of the variables in your build system.
-#
-#  get_git_head_revision(<refspecvar> <hashvar> [<additional arguments to git describe> ...])
-#
-# Returns the refspec and sha hash of the current head revision
-#
-#  git_describe(<var> [<additional arguments to git describe> ...])
-#
-# Returns the results of git describe on the source tree, and adjusting
-# the output so that it tests false if an error occurs.
-#
-#  git_get_exact_tag(<var> [<additional arguments to git describe> ...])
-#
-# Returns the results of git describe --exact-match on the source tree,
-# and adjusting the output so that it tests false if there was no exact
-# matching tag.
-#
-# Requires CMake 2.6 or newer (uses the 'function' command)
-#
-# Original Author:
-# 2009-2010 Ryan Pavlik <rpavlik at iastate.edu> <abiryan at ryand.net>
-# http://academic.cleardefinition.com
-# Iowa State University HCI Graduate Program/VRAC
-#
-# Copyright Iowa State University 2009-2010.
-# Distributed under the Boost Software License, Version 1.0.
-# (See accompanying file LICENSE_1_0.txt or copy at
-# http://www.boost.org/LICENSE_1_0.txt)
-
-if(__get_git_revision_description)
-	return()
-endif()
-set(__get_git_revision_description YES)
-
-# We must run the following at "include" time, not at function call time,
-# to find the path to this module rather than the path to a calling list file
-get_filename_component(_gitdescmoddir ${CMAKE_CURRENT_LIST_FILE} PATH)
-
-function(get_git_head_revision _refspecvar _hashvar)
-	set(GIT_PARENT_DIR "${CMAKE_SOURCE_DIR}")
-	set(GIT_DIR "${GIT_PARENT_DIR}/.git")
-	while(NOT EXISTS "${GIT_DIR}")	# .git dir not found, search parent directories
-		set(GIT_PREVIOUS_PARENT "${GIT_PARENT_DIR}")
-		get_filename_component(GIT_PARENT_DIR ${GIT_PARENT_DIR} PATH)
-		if(GIT_PARENT_DIR STREQUAL GIT_PREVIOUS_PARENT)
-			# We have reached the root directory, we are not in git
-			set(${_refspecvar} "GITDIR-NOTFOUND" PARENT_SCOPE)
-			set(${_hashvar} "GITDIR-NOTFOUND" PARENT_SCOPE)
-			return()
-		endif()
-		set(GIT_DIR "${GIT_PARENT_DIR}/.git")
-	endwhile()
-	set(GIT_DATA "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/git-data")
-	if(NOT EXISTS "${GIT_DATA}")
-		file(MAKE_DIRECTORY "${GIT_DATA}")
-	endif()
-
-	if(NOT EXISTS "${GIT_DIR}/HEAD")
-		return()
-	endif()
-	set(HEAD_FILE "${GIT_DATA}/HEAD")
-	configure_file("${GIT_DIR}/HEAD" "${HEAD_FILE}" COPYONLY)
-
-	configure_file("${_gitdescmoddir}/GetGitRevisionDescription.cmake.in"
-		"${GIT_DATA}/grabRef.cmake"
-		@ONLY)
-	include("${GIT_DATA}/grabRef.cmake")
-
-	set(${_refspecvar} "${HEAD_REF}" PARENT_SCOPE)
-	set(${_hashvar} "${HEAD_HASH}" PARENT_SCOPE)
-endfunction()
-
-function(git_describe _var)
-	if(NOT GIT_FOUND)
-		find_package(Git QUIET)
-	endif()
-	get_git_head_revision(refspec hash)
-	if(NOT GIT_FOUND)
-		set(${_var} "GIT-NOTFOUND" PARENT_SCOPE)
-		return()
-	endif()
-	if(NOT hash)
-		set(${_var} "HEAD-HASH-NOTFOUND" PARENT_SCOPE)
-		return()
-	endif()
-
-	# TODO sanitize
-	#if((${ARGN}" MATCHES "&&") OR
-	#	(ARGN MATCHES "||") OR
-	#	(ARGN MATCHES "\\;"))
-	#	message("Please report the following error to the project!")
-	#	message(FATAL_ERROR "Looks like someone's doing something nefarious with git_describe! Passed arguments ${ARGN}")
-	#endif()
-
-	#message(STATUS "Arguments to execute_process: ${ARGN}")
-
-	execute_process(COMMAND
-		"${GIT_EXECUTABLE}"
-		describe
-		${hash}
-		${ARGN}
-		WORKING_DIRECTORY
-		"${CMAKE_SOURCE_DIR}"
-		RESULT_VARIABLE
-		res
-		OUTPUT_VARIABLE
-		out
-		ERROR_QUIET
-		OUTPUT_STRIP_TRAILING_WHITESPACE)
-	if(NOT res EQUAL 0)
-		set(out "${out}-${res}-NOTFOUND")
-	endif()
-
-	set(${_var} "${out}" PARENT_SCOPE)
-endfunction()
-
-function(git_get_exact_tag _var)
-	git_describe(out --exact-match ${ARGN})
-	set(${_var} "${out}" PARENT_SCOPE)
-endfunction()
diff --git a/misc/cmake/GetGitRevisionDescription.cmake.in b/misc/cmake/GetGitRevisionDescription.cmake.in
deleted file mode 100644
index 888ce13..0000000
--- a/misc/cmake/GetGitRevisionDescription.cmake.in
+++ /dev/null
@@ -1,38 +0,0 @@
-# 
-# Internal file for GetGitRevisionDescription.cmake
-#
-# Requires CMake 2.6 or newer (uses the 'function' command)
-#
-# Original Author:
-# 2009-2010 Ryan Pavlik <rpavlik at iastate.edu> <abiryan at ryand.net>
-# http://academic.cleardefinition.com
-# Iowa State University HCI Graduate Program/VRAC
-#
-# Copyright Iowa State University 2009-2010.
-# Distributed under the Boost Software License, Version 1.0.
-# (See accompanying file LICENSE_1_0.txt or copy at
-# http://www.boost.org/LICENSE_1_0.txt)
-
-set(HEAD_HASH)
-
-file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024)
-
-string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS)
-if(HEAD_CONTENTS MATCHES "ref")
-	# named branch
-	string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}")
-	if(EXISTS "@GIT_DIR@/${HEAD_REF}")
-		configure_file("@GIT_DIR@/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY)
-	elseif(EXISTS "@GIT_DIR@/logs/${HEAD_REF}")
-		configure_file("@GIT_DIR@/logs/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY)
-		set(HEAD_HASH "${HEAD_REF}")
-	endif()
-else()
-	# detached HEAD
-	configure_file("@GIT_DIR@/HEAD" "@GIT_DATA@/head-ref" COPYONLY)
-endif()
-
-if(NOT HEAD_HASH)
-	file(READ "@GIT_DATA@/head-ref" HEAD_HASH LIMIT 1024)
-	string(STRIP "${HEAD_HASH}" HEAD_HASH)
-endif()
diff --git a/misc/do-release.txt b/misc/do-release.txt
index 8a7ccda..1d18f7d 100644
--- a/misc/do-release.txt
+++ b/misc/do-release.txt
@@ -26,14 +26,15 @@
 * Release Procedure
 
 ** Update CHANGELOG -> change from (unreleased) to (release date)
+   [file:../CHANGELOG]
 
 ** Update the following files with the correct version number:
    [file:../CMakeLists.txt] -> STXXL_VERSION_*
    [file:../Doxyfile] -> PROJECT_NUMBER
 
 ** Git commit and add signed tag
-   git commit -am "Finalizing release 1.4.x"
-   git tag -s -a 1.4.x
+   git commit -am "stxxl-1.4.x"
+   git tag -s -a 1.4.x -m "Final release 1.4.x"
    git push
    git push --tags
 
@@ -48,12 +49,16 @@
    # move stxxl-1.4.x.tar.gz out of build
 
 ** Generate stand-alone doxygen tarball:
-   # check doxygen version!
+   # check doxygen version! -> 1.8.5
+   doxygen --version
    git reset --hard master
    git clean -d -f -x
    mv include/stxxl/bits/config.h.in include/stxxl/bits/config.h
    doxygen
+   # optimize pngs
    optipng -o7 doxygen-html/*.png
+   # tag HTML with version and link
+   sed -i "s@<li class=\"footer\">Generated@<li class=\"footer\"><a href=\"http://stxxl.sourceforge.net\">STXXL 1.4.x</a> - Generated@" doxygen-html/*.html
    tar czf stxxl-1.4.x-doxygen.tar.gz doxygen-html
    # check tarball
    tar tvvzf stxxl-1.4.x-doxygen.tar.gz
@@ -65,17 +70,19 @@
 
 ** Let website generator create tag/master for current git repository.
    - copy tag/master into git website repository
+   - optipng -o7 tag/1.4.x/*.png
 
 * Post-Release Procedure
 
 ** Update CHANGELOG -> change to next version 1.4.99 (unreleased)
+   [file:../CHANGELOG]
 
 ** Update the following files with the new prerelease version number:
    [file:../CMakeLists.txt] -> STXXL_VERSION_*
    [file:../Doxyfile] -> PROJECT_NUMBER
 
 ** Git commit and add signed tag
-   git commit -am "Tagging trunk with prerelease 1.4.99"
+   git commit -am "Tagging trunk with unreleased 1.4-dev"
 
 * Announcements
 
diff --git a/misc/uncrustify.cfg b/misc/uncrustify.cfg
index 0c0336b..f2d420f 100644
--- a/misc/uncrustify.cfg
+++ b/misc/uncrustify.cfg
@@ -1237,7 +1237,7 @@ pos_comma                                = trail    # ignore/join/lead/lead_brea
 pos_class_comma                          = trail    # ignore/join/lead/lead_break/lead_force/trail/trail_break/trail_force
 
 # The position of colons between constructor and member initialization
-pos_class_colon                          = ignore   # ignore/join/lead/lead_break/lead_force/trail/trail_break/trail_force
+pos_class_colon                          = lead_force   # ignore/join/lead/lead_break/lead_force/trail/trail_break/trail_force
 
 #
 # Line Splitting options
@@ -1260,7 +1260,7 @@ ls_code_width                            = false    # false/true
 #
 
 # The maximum consecutive newlines
-nl_max                                   = 3        # number
+nl_max                                   = 2        # number
 
 # The number of newlines after a function prototype, if followed by another function prototype
 nl_after_func_proto                      = 0        # number
diff --git a/tests/algo/CMakeLists.txt b/tests/algo/CMakeLists.txt
index 88c7fbe..7db0ac8 100644
--- a/tests/algo/CMakeLists.txt
+++ b/tests/algo/CMakeLists.txt
@@ -43,6 +43,7 @@ if(NOT CYGWIN AND NOT MINGW) #-tb too big to build on cygwin
     # requires /bigobj flag to build
     set_target_properties(test_ksort_all_parameters PROPERTIES COMPILE_FLAGS /bigobj)
     set_target_properties(test_sort_all_parameters PROPERTIES COMPILE_FLAGS /bigobj)
+    set_target_properties(test_stable_ksort_all_parameters PROPERTIES COMPILE_FLAGS /bigobj)
   endif()
 
   ### extra sort tests:
diff --git a/tests/algo/test_asch.cpp b/tests/algo/test_asch.cpp
index 5104c35..0022d54 100644
--- a/tests/algo/test_asch.cpp
+++ b/tests/algo/test_asch.cpp
@@ -18,7 +18,6 @@
 
 // Test async schedule algorithm
 
-
 int main(int argc, char* argv[])
 {
     if (argc < 5)
@@ -34,11 +33,9 @@ int main(int argc, char* argv[])
     stxxl::int_type* prefetch_order = new stxxl::int_type[L];
     int* count = new int[D];
 
-
     for (int i = 0; i < D; i++)
         count[i] = 0;
 
-
     stxxl::random_number32 rnd;
     for (int i = 0; i < L; i++)
     {
diff --git a/tests/algo/test_bad_cmp.cpp b/tests/algo/test_bad_cmp.cpp
index 06015dc..9fe2d03 100644
--- a/tests/algo/test_bad_cmp.cpp
+++ b/tests/algo/test_bad_cmp.cpp
@@ -19,20 +19,15 @@
 #include <stxxl/sort>
 #include <stxxl/vector>
 
-
 struct my_type
 {
     typedef unsigned key_type;
 
-    key_type _key;
-    key_type _data;
-    key_type key() const
-    {
-        return _key;
-    }
+    key_type m_key;
+    key_type m_data;
 
     my_type() { }
-    my_type(key_type __key) : _key(__key), _data(0) { }
+    my_type(key_type k) : m_key(k), m_data(0) { }
 
     static my_type min_value()
     {
@@ -48,23 +43,23 @@ struct my_type
 
 std::ostream& operator << (std::ostream& o, const my_type& obj)
 {
-    o << obj._key;
+    o << obj.m_key;
     return o;
 }
 
 bool operator < (const my_type& a, const my_type& b)
 {
-    return a.key() < b.key();
+    return a.m_key < b.m_key;
 }
 
 bool operator == (const my_type& a, const my_type& b)
 {
-    return a.key() == b.key();
+    return a.m_key == b.m_key;
 }
 
 bool operator != (const my_type& a, const my_type& b)
 {
-    return a.key() != b.key();
+    return a.m_key != b.m_key;
 }
 
 struct cmp : public std::less<my_type>
@@ -79,7 +74,6 @@ struct cmp : public std::less<my_type>
     }
 };
 
-
 int main(int argc, char* argv[])
 {
     const stxxl::int_type SIZE = (argc >= 2) ? atoi(argv[1]) : 16;
@@ -99,8 +93,8 @@ int main(int argc, char* argv[])
 
     STXXL_MSG("Filling vector with min_value..., input size = " << v.size() << " elements (" << ((v.size() * sizeof(my_type)) >> 20) << " MiB)");
     for (vector_type::size_type i = 0; i < v.size(); i++) {
-        v[i]._key = 0;
-        v[i]._data = (int)(i + 1);
+        v[i].m_key = 0;
+        v[i].m_data = (int)(i + 1);
     }
 
     STXXL_MSG("Checking order...");
@@ -114,11 +108,11 @@ int main(int argc, char* argv[])
 
     aliens = not_stable = 0;
     for (vector_type::size_type i = 0; i < v.size(); i++) {
-        if (v[i]._data < 1)
+        if (v[i].m_data < 1)
             ++aliens;
-        else if (v[i]._data != i + 1)
+        else if (v[i].m_data != i + 1)
             ++not_stable;
-        v[i]._data = (int)(i + 1);
+        v[i].m_data = (int)(i + 1);
     }
     STXXL_MSG("elements that were not in the input:     " << aliens);
     STXXL_MSG("elements not on their expected location: " << not_stable);
@@ -131,19 +125,19 @@ int main(int argc, char* argv[])
 
     aliens = not_stable = 0;
     for (vector_type::size_type i = 0; i < v.size(); i++) {
-        if (v[i]._data < 1)
+        if (v[i].m_data < 1)
             ++aliens;
-        else if (v[i]._data != i + 1)
+        else if (v[i].m_data != i + 1)
             ++not_stable;
-        v[i]._data = (int)(i + 1);
+        v[i].m_data = (int)(i + 1);
     }
     STXXL_MSG("elements that were not in the input:     " << aliens);
     STXXL_MSG("elements not on their expected location: " << not_stable);
 
     STXXL_MSG("Filling vector with max_value..., input size = " << v.size() << " elements (" << ((v.size() * sizeof(my_type)) >> 20) << " MiB)");
     for (vector_type::size_type i = 0; i < v.size(); i++) {
-        v[i]._key = unsigned(-1);
-        v[i]._data = int(i + 1);
+        v[i].m_key = unsigned(-1);
+        v[i].m_data = int(i + 1);
     }
 
     STXXL_MSG("Sorting subset (using " << (memory_to_use >> 20) << " MiB of memory)...");
@@ -154,11 +148,11 @@ int main(int argc, char* argv[])
 
     aliens = not_stable = 0;
     for (vector_type::size_type i = 0; i < v.size(); i++) {
-        if (v[i]._data < 1)
+        if (v[i].m_data < 1)
             ++aliens;
-        else if (v[i]._data != i + 1)
+        else if (v[i].m_data != i + 1)
             ++not_stable;
-        v[i]._data = int(i + 1);
+        v[i].m_data = int(i + 1);
     }
     STXXL_MSG("elements that were not in the input:     " << aliens);
     STXXL_MSG("elements not on their expected location: " << not_stable);
diff --git a/tests/algo/test_ksort.cpp b/tests/algo/test_ksort.cpp
index 6480765..6f381b0 100644
--- a/tests/algo/test_ksort.cpp
+++ b/tests/algo/test_ksort.cpp
@@ -17,21 +17,21 @@
 #include <stxxl/ksort>
 #include <stxxl/vector>
 
-
 struct my_type
 {
     typedef stxxl::uint64 key_type1;
 
-    key_type1 _key;
-    key_type1 _key_copy;
-    char _data[32 - 2 * sizeof(key_type1)];
+    key_type1 m_key;
+    key_type1 m_key_copy;
+    char m_data[32 - 2 * sizeof(key_type1)];
+
     key_type1 key() const
     {
-        return _key;
+        return m_key;
     }
 
-    my_type() : _key(0), _key_copy(0) { }
-    my_type(key_type1 __key) : _key(__key), _key_copy(__key) { }
+    my_type() : m_key(0), m_key_copy(0) { }
+    my_type(key_type1 k) : m_key(k), m_key_copy(k) { }
 
     my_type min_value() const { return my_type(std::numeric_limits<key_type1>::min()); }
     my_type max_value() const { return my_type(std::numeric_limits<key_type1>::max()); }
@@ -39,7 +39,7 @@ struct my_type
 
 std::ostream& operator << (std::ostream& o, const my_type& obj)
 {
-    o << obj._key << " " << obj._key_copy;
+    o << obj.m_key << " " << obj.m_key_copy;
     return o;
 }
 
@@ -49,7 +49,7 @@ struct get_key
     my_type dummy;
     key_type operator () (const my_type& obj) const
     {
-        return obj._key;
+        return obj.m_key;
     }
     my_type min_value() const
     {
@@ -61,7 +61,6 @@ struct get_key
     }
 };
 
-
 bool operator < (const my_type& a, const my_type& b)
 {
     return a.key() < b.key();
@@ -90,8 +89,8 @@ int main()
     STXXL_MSG("Filling vector... ");
     for (vector_type::size_type i = 0; i < v.size(); i++)
     {
-        v[i]._key = rnd() + 1;
-        v[i]._key_copy = v[i]._key;
+        v[i].m_key = rnd() + 1;
+        v[i].m_key_copy = v[i].m_key;
     }
 
     STXXL_MSG("Checking order...");
@@ -107,14 +106,14 @@ int main()
     my_type prev;
     for (vector_type::size_type i = 0; i < v.size(); i++)
     {
-        if (v[i]._key != v[i]._key_copy)
+        if (v[i].m_key != v[i].m_key_copy)
         {
             STXXL_MSG("Bug at position " << i);
             abort();
         }
-        if (i > 0 && prev._key == v[i]._key)
+        if (i > 0 && prev.m_key == v[i].m_key)
         {
-            STXXL_MSG("Duplicate at position " << i << " key=" << v[i]._key);
+            STXXL_MSG("Duplicate at position " << i << " key=" << v[i].m_key);
             //abort();
         }
         prev = v[i];
diff --git a/tests/algo/test_ksort_all_parameters.cpp b/tests/algo/test_ksort_all_parameters.cpp
index bcd3b01..9053b47 100644
--- a/tests/algo/test_ksort_all_parameters.cpp
+++ b/tests/algo/test_ksort_all_parameters.cpp
@@ -22,14 +22,12 @@
 #define KEY_COMPARE
 #include "test_sort_all_parameters.h"
 
-
 #ifndef RECORD_SIZE
  #define RECORD_SIZE 128
 #endif
 
 #define MB (1024 * 1024)
 
-
 template <typename T, typename alloc_strategy_type, unsigned block_size>
 void test(stxxl::uint64 data_mem, unsigned memory_to_use)
 {
@@ -108,7 +106,7 @@ int main(int argc, char* argv[])
     int sort_mem = atoi(argv[2]) * MB;
     int strategy = atoi(argv[3]);
     int block_size = atoi(argv[4]);
-    stxxl::set_seed(strtoul(argv[5], NULL, 10));
+    stxxl::set_seed((unsigned)strtoul(argv[5], NULL, 10));
     STXXL_MSG("Seed " << stxxl::get_next_seed());
     stxxl::srandom_number32();
 
diff --git a/tests/algo/test_parallel_sort.cpp b/tests/algo/test_parallel_sort.cpp
index 11274cd..914afae 100644
--- a/tests/algo/test_parallel_sort.cpp
+++ b/tests/algo/test_parallel_sort.cpp
@@ -29,36 +29,36 @@
 #include <stxxl/scan>
 #include <stxxl/sort>
 
+using stxxl::unsigned_type;
 
 const unsigned long long megabyte = 1024 * 1024;
 
-//const int block_size = STXXL_DEFAULT_BLOCK_SIZE(my_type);
-const int block_size = 4 * megabyte;
+const int block_size = STXXL_DEFAULT_BLOCK_SIZE(my_type);
 
 #define RECORD_SIZE 20
 #define MAGIC 123
 
-stxxl::unsigned_type run_size;
-stxxl::unsigned_type buffer_size;
+unsigned_type run_size;
+unsigned_type buffer_size;
 
 struct my_type
 {
     typedef unsigned long long key_type;
 
-    key_type _key;
-    key_type _load;
-    char _data[RECORD_SIZE - 2 * sizeof(key_type)];
-    key_type key() const { return _key; }
+    key_type m_key;
+    key_type m_load;
+    char m_data[RECORD_SIZE - 2 * sizeof(key_type)];
+    key_type key() const { return m_key; }
 
     my_type() { }
-    my_type(key_type __key) : _key(__key) { }
-    my_type(key_type __key, key_type __load) : _key(__key), _load(__load) { }
+    my_type(key_type k) : m_key(k) { }
+    my_type(key_type k, key_type l) : m_key(k), m_load(l) { }
 
-    void operator = (const key_type& __key) { _key = __key; }
+    void operator = (const key_type& k) { m_key = k; }
     void operator = (const my_type& mt)
     {
-        _key = mt._key;
-        _load = mt._load;
+        m_key = mt.m_key;
+        m_load = mt.m_load;
     }
 };
 
@@ -76,7 +76,7 @@ inline bool operator == (const my_type& a, const my_type& b)
 
 inline std::ostream& operator << (std::ostream& o, const my_type& obj)
 {
-    o << obj._key << "/" << obj._load;
+    o << obj.m_key << "/" << obj.m_load;
     return o;
 }
 
@@ -88,17 +88,17 @@ struct cmp_less_key : public std::less<my_type>
 
 typedef stxxl::vector<my_type, 4, stxxl::lru_pager<8>, block_size, STXXL_DEFAULT_ALLOC_STRATEGY> vector_type;
 
-stxxl::unsigned_type checksum(vector_type& input)
+unsigned_type checksum(vector_type& input)
 {
-    stxxl::unsigned_type sum = 0;
+    unsigned_type sum = 0;
     for (vector_type::const_iterator i = input.begin(); i != input.end(); ++i)
-        sum += (*i)._key;
+        sum += (unsigned_type)((*i).m_key);
     return sum;
 }
 
 void linear_sort_normal(vector_type& input)
 {
-    stxxl::unsigned_type sum1 = checksum(input);
+    unsigned_type sum1 = checksum(input);
 
     stxxl::stats_data stats_begin(*stxxl::stats::get_instance());
     double start = stxxl::timestamp();
@@ -108,7 +108,7 @@ void linear_sort_normal(vector_type& input)
     double stop = stxxl::timestamp();
     std::cout << stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin;
 
-    stxxl::unsigned_type sum2 = checksum(input);
+    unsigned_type sum2 = checksum(input);
 
     std::cout << sum1 << " ?= " << sum2 << std::endl;
 
@@ -119,16 +119,15 @@ void linear_sort_normal(vector_type& input)
 
 void linear_sort_streamed(vector_type& input, vector_type& output)
 {
-    stxxl::unsigned_type sum1 = checksum(input);
+    unsigned_type sum1 = checksum(input);
 
     stxxl::stats_data stats_begin(*stxxl::stats::get_instance());
     double start = stxxl::timestamp();
 
-    typedef __typeof__ (stxxl::stream::streamify(input.begin(), input.end())) input_stream_type;
+    typedef stxxl::stream::streamify_traits<vector_type::iterator>::stream_type input_stream_type;
 
     input_stream_type input_stream = stxxl::stream::streamify(input.begin(), input.end());
 
-
     typedef cmp_less_key comparator_type;
     comparator_type cl;
 
@@ -142,7 +141,7 @@ void linear_sort_streamed(vector_type& input, vector_type& output)
     double stop = stxxl::timestamp();
     std::cout << stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin;
 
-    stxxl::unsigned_type sum2 = checksum(output);
+    unsigned_type sum2 = checksum(output);
 
     std::cout << sum1 << " ?= " << sum2 << std::endl;
     if (sum1 != sum2)
@@ -153,7 +152,6 @@ void linear_sort_streamed(vector_type& input, vector_type& output)
     std::cout << "Linear sorting streamed took " << (stop - start) << " seconds." << std::endl;
 }
 
-
 int main(int argc, const char** argv)
 {
     if (argc < 6) {
@@ -168,7 +166,7 @@ int main(int argc, const char** argv)
 #endif
     unsigned long megabytes_to_process = atoi(argv[1]);
     int p = atoi(argv[2]);
-    stxxl::unsigned_type memory_to_use = (stxxl::unsigned_type)atoi(argv[3]) * megabyte;
+    unsigned_type memory_to_use = (unsigned_type)(atoi(argv[3]) * megabyte);
     run_size = memory_to_use;
     buffer_size = memory_to_use / 16;
 #ifdef STXXL_PARALLEL_MODE
diff --git a/tests/algo/test_random_shuffle.cpp b/tests/algo/test_random_shuffle.cpp
index 640ab5d..f005e43 100644
--- a/tests/algo/test_random_shuffle.cpp
+++ b/tests/algo/test_random_shuffle.cpp
@@ -21,7 +21,6 @@
 #include <stxxl/vector>
 #include <stxxl/random_shuffle>
 
-
 template <typename type>
 struct counter
 {
@@ -57,7 +56,6 @@ void long_test()
     STXXL_MSG("Permute randomly...");
     stxxl::random_shuffle(STXXLVector.begin(), STXXLVector.end(), 1024 * 1024 * 128);
 
-
     STXXL_MSG("Begin: ");
     for (i = 0; i < 10; i++)
         STXXL_MSG(STXXLVector[i]);
diff --git a/tests/algo/test_scan.cpp b/tests/algo/test_scan.cpp
index 23d49dd..60d0576 100644
--- a/tests/algo/test_scan.cpp
+++ b/tests/algo/test_scan.cpp
@@ -22,7 +22,6 @@
 using stxxl::int64;
 using stxxl::timestamp;
 
-
 template <typename type>
 struct counter
 {
@@ -67,14 +66,12 @@ int main()
 
     stxxl::generate(v.begin(), v.end(), counter<int64>(), 4);
 
-
     STXXL_MSG("for_each_m ...");
     b = timestamp();
     stxxl::for_each_m(v.begin(), v.end(), square<int64>(), 4);
     e = timestamp();
     STXXL_MSG("for_each_m time: " << (e - b));
 
-
     STXXL_MSG("check");
     for (i = 0; i < v.size(); ++i)
     {
@@ -92,7 +89,6 @@ int main()
     e = timestamp();
     STXXL_MSG("generate: " << (e - b));
 
-
     STXXL_MSG("check");
     STXXL_CHECK2(v[0] == 0, "Error at position " << 0);
 
diff --git a/tests/algo/test_sort.cpp b/tests/algo/test_sort.cpp
index bccf63c..09c92f6 100644
--- a/tests/algo/test_sort.cpp
+++ b/tests/algo/test_sort.cpp
@@ -18,25 +18,24 @@
 #include <stxxl/sort>
 #include <stxxl/vector>
 
-
 #define RECORD_SIZE 8
 
 struct my_type
 {
     typedef unsigned key_type;
 
-    key_type _key;
-    char _data[RECORD_SIZE - sizeof(key_type)];
+    key_type m_key;
+    char m_data[RECORD_SIZE - sizeof(key_type)];
     key_type key() const
     {
-        return _key;
+        return m_key;
     }
 
     my_type() { }
-    my_type(key_type __key) : _key(__key)
+    my_type(key_type k) : m_key(k)
     {
 #if STXXL_WITH_VALGRIND
-        memset(_data, 0, sizeof(_data));
+        memset(m_data, 0, sizeof(m_data));
 #endif
     }
 
@@ -54,7 +53,7 @@ struct my_type
 
 std::ostream& operator << (std::ostream& o, const my_type& obj)
 {
-    o << obj._key;
+    o << obj.m_key;
     return o;
 }
 
@@ -85,7 +84,6 @@ struct cmp : public std::less<my_type>
     }
 };
 
-
 int main()
 {
 #if STXXL_PARALLEL_MULTIWAY_MERGE
@@ -114,7 +112,7 @@ int main()
     stxxl::random_number32 rnd;
     STXXL_MSG("Filling vector..., input size = " << v.size() << " elements (" << ((v.size() * sizeof(my_type)) >> 20) << " MiB)");
     for (vector_type::size_type i = 0; i < v.size(); i++)
-        v[i]._key = 1 + (rnd() % 0xfffffff);
+        v[i].m_key = 1 + (rnd() % 0xfffffff);
 
     STXXL_MSG("Checking order...");
     STXXL_CHECK(!stxxl::is_sorted(v.begin(), v.end(), cmp()));
@@ -125,7 +123,6 @@ int main()
     STXXL_MSG("Checking order...");
     STXXL_CHECK(stxxl::is_sorted(v.begin(), v.end(), cmp()));
 
-
     STXXL_MSG("Done, output size=" << v.size());
 
     return 0;
diff --git a/tests/algo/test_sort_all_parameters.cpp b/tests/algo/test_sort_all_parameters.cpp
index 266807f..91651de 100644
--- a/tests/algo/test_sort_all_parameters.cpp
+++ b/tests/algo/test_sort_all_parameters.cpp
@@ -21,14 +21,12 @@
 
 #include "test_sort_all_parameters.h"
 
-
 #ifndef RECORD_SIZE
  #define RECORD_SIZE 4
 #endif
 
 #define MB (1024 * 1024)
 
-
 template <typename T, typename alloc_strategy_type, unsigned block_size>
 void test(stxxl::uint64 data_mem, unsigned memory_to_use)
 {
@@ -104,7 +102,7 @@ int main(int argc, char* argv[])
     int sort_mem = atoi(argv[2]) * MB;
     int strategy = atoi(argv[3]);
     int block_size = atoi(argv[4]);
-    stxxl::set_seed(strtoul(argv[5], NULL, 10));
+    stxxl::set_seed((unsigned)strtoul(argv[5], NULL, 10));
     STXXL_MSG("Seed " << stxxl::get_next_seed());
     stxxl::srandom_number32();
 
diff --git a/tests/algo/test_sort_all_parameters.h b/tests/algo/test_sort_all_parameters.h
index 434d347..7834a1c 100644
--- a/tests/algo/test_sort_all_parameters.h
+++ b/tests/algo/test_sort_all_parameters.h
@@ -18,12 +18,12 @@
 template <unsigned n>
 struct bulk
 {
-    char _data[n];
+    char m_data[n];
 
     bulk()
     {
 #if STXXL_WITH_VALGRIND
-        memset(_data, 0, n);
+        memset(m_data, 0, n);
 #endif
     }
 };
@@ -37,16 +37,16 @@ struct my_type
 {
     typedef KEY key_type;
 
-    key_type _key;
-    bulk<SIZE - sizeof(key_type)> _data;
+    key_type m_key;
+    bulk<SIZE - sizeof(key_type)> m_data;
 
     my_type() { }
-    my_type(key_type __key) : _key(__key) { }
+    my_type(key_type k) : m_key(k) { }
 
 #ifdef KEY_COMPARE
     key_type key() const
     {
-        return _key;
+        return m_key;
     }
 #endif
 
@@ -63,11 +63,7 @@ struct my_type
 template <typename KEY, unsigned SIZE>
 std::ostream& operator << (std::ostream& o, const my_type<KEY, SIZE> obj)
 {
-#ifndef KEY_COMPARE
-    o << obj._key;
-#else
-    o << obj.key();
-#endif
+    o << obj.m_key;
     return o;
 }
 
@@ -76,19 +72,19 @@ std::ostream& operator << (std::ostream& o, const my_type<KEY, SIZE> obj)
 template <typename KEY, unsigned SIZE>
 bool operator < (const my_type<KEY, SIZE>& a, const my_type<KEY, SIZE>& b)
 {
-    return a._key < b._key;
+    return a.m_key < b.m_key;
 }
 
 template <typename KEY, unsigned SIZE>
 bool operator == (const my_type<KEY, SIZE>& a, const my_type<KEY, SIZE>& b)
 {
-    return a._key == b._key;
+    return a.m_key == b.m_key;
 }
 
 template <typename KEY, unsigned SIZE>
 bool operator != (const my_type<KEY, SIZE>& a, const my_type<KEY, SIZE>& b)
 {
-    return a._key != b._key;
+    return a.m_key != b.m_key;
 }
 
 template <typename T>
@@ -96,7 +92,7 @@ struct Cmp : public std::less<T>
 {
     bool operator () (const T& a, const T& b) const
     {
-        return a._key < b._key;
+        return a.m_key < b.m_key;
     }
 
     static T min_value()
diff --git a/tests/algo/test_stable_ksort.cpp b/tests/algo/test_stable_ksort.cpp
index c93c14a..a17f636 100644
--- a/tests/algo/test_stable_ksort.cpp
+++ b/tests/algo/test_stable_ksort.cpp
@@ -18,20 +18,20 @@
 #include <stxxl/ksort>
 #include <stxxl/vector>
 
-
 struct my_type
 {
     typedef unsigned key_type;
 
-    key_type _key;
-    char _data[128 - sizeof(key_type)];
+    key_type m_key;
+    char m_data[128 - sizeof(key_type)];
+
     key_type key() const
     {
-        return _key;
+        return m_key;
     }
 
     my_type() { }
-    my_type(key_type __key) : _key(__key) { }
+    my_type(key_type k) : m_key(k) { }
 
     static my_type min_value()
     {
@@ -61,7 +61,7 @@ int main()
     stxxl::random_number32 rnd;
     STXXL_MSG("Filling vector... " << rnd() << " " << rnd() << " " << rnd());
     for (vector_type::size_type i = 0; i < v.size(); i++)
-        v[i]._key = (rnd() / 2) * 2;
+        v[i].m_key = (rnd() / 2) * 2;
 
     STXXL_MSG("Checking order...");
     STXXL_CHECK(!stxxl::is_sorted(v.begin(), v.end()));
@@ -72,6 +72,5 @@ int main()
     STXXL_MSG("Checking order...");
     STXXL_CHECK(stxxl::is_sorted(v.begin(), v.end()));
 
-
     return 0;
 }
diff --git a/tests/algo/test_stable_ksort_all_parameters.cpp b/tests/algo/test_stable_ksort_all_parameters.cpp
index 55b1b20..2d9c105 100644
--- a/tests/algo/test_stable_ksort_all_parameters.cpp
+++ b/tests/algo/test_stable_ksort_all_parameters.cpp
@@ -21,14 +21,12 @@
 #define KEY_COMPARE
 #include "test_sort_all_parameters.h"
 
-
 #ifndef RECORD_SIZE
  #define RECORD_SIZE 128
 #endif
 
 #define MB (1024 * 1024)
 
-
 template <typename T, typename alloc_strategy_type, unsigned block_size>
 void test(stxxl::uint64 data_mem, unsigned memory_to_use)
 {
@@ -107,7 +105,7 @@ int main(int argc, char* argv[])
     int sort_mem = atoi(argv[2]) * MB;
     int strategy = atoi(argv[3]);
     int block_size = atoi(argv[4]);
-    stxxl::set_seed(strtoul(argv[5], NULL, 10));
+    stxxl::set_seed((unsigned)strtoul(argv[5], NULL, 10));
     STXXL_MSG("Seed " << stxxl::get_next_seed());
     stxxl::srandom_number32();
 
diff --git a/tests/common/CMakeLists.txt b/tests/common/CMakeLists.txt
index 57e1925..d4550d0 100644
--- a/tests/common/CMakeLists.txt
+++ b/tests/common/CMakeLists.txt
@@ -3,15 +3,19 @@
 #
 #  Part of the STXXL. See http://stxxl.sourceforge.net
 #
-#  Copyright (C) 2013 Timo Bingmann <tb at panthema.net>
+#  Copyright (C) 2013-2014 Timo Bingmann <tb at panthema.net>
 #
 #  Distributed under the Boost Software License, Version 1.0.
 #  (See accompanying file LICENSE_1_0.txt or copy at
 #  http://www.boost.org/LICENSE_1_0.txt)
 ############################################################################
 
+stxxl_build_test(test_binary_buffer)
 stxxl_build_test(test_cmdline)
 stxxl_build_test(test_counting_ptr)
+if(USE_BOOST)
+  stxxl_build_test(test_external_shared_ptr)
+endif(USE_BOOST)
 stxxl_build_test(test_globals)
 stxxl_build_test(test_log2)
 stxxl_build_test(test_manyunits test_manyunits2)
@@ -19,8 +23,12 @@ stxxl_build_test(test_random)
 stxxl_build_test(test_tuple)
 stxxl_build_test(test_uint_types)
 
+stxxl_test(test_binary_buffer)
 stxxl_test(test_cmdline)
 stxxl_test(test_counting_ptr)
+if(USE_BOOST)
+  stxxl_test(test_external_shared_ptr)
+endif(USE_BOOST)
 stxxl_test(test_globals)
 stxxl_test(test_log2)
 stxxl_test(test_manyunits)
diff --git a/tests/common/test_binary_buffer.cpp b/tests/common/test_binary_buffer.cpp
new file mode 100644
index 0000000..ffddde6
--- /dev/null
+++ b/tests/common/test_binary_buffer.cpp
@@ -0,0 +1,90 @@
+/***************************************************************************
+ *  tests/common/test_binary_buffer.cpp
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#include <stxxl/bits/common/binary_buffer.h>
+#include <stxxl/bits/verbose.h>
+
+void test1()
+{
+//! [serialize]
+    // construct a binary blob
+    stxxl::binary_buffer bb;
+    {
+        bb.put<unsigned int>(1);
+        bb.put_string("test");
+
+        bb.put_varint(42);
+        bb.put_varint(12345678);
+
+        // add a sub block
+        stxxl::binary_buffer sub;
+        sub.put_string("sub block");
+        sub.put_varint(6 * 9);
+
+        bb.put_string(sub);
+    }
+//! [serialize]
+
+    // read binary block and verify content
+
+    stxxl::binary_buffer_ref bbr = bb;
+
+    const unsigned char bb_data[] = {
+        // bb.put<unsigned int>(1)
+        0x01, 0x00, 0x00, 0x00,
+        // bb.put_string("test")
+        0x04, 0x74, 0x65, 0x73, 0x74,
+        // bb.put_varint(42);
+        0x2a,
+        // bb.put_varint(12345678);
+        0xce, 0xc2, 0xf1, 0x05,
+        // begin sub block (length)
+        0x0b,
+        // sub.put_string("sub block");
+        0x09, 0x73, 0x75, 0x62, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b,
+        // sub.put_varint(6 * 9);
+        0x36,
+    };
+
+    stxxl::binary_buffer_ref bb_verify(bb_data, sizeof(bb_data));
+
+    if (bbr != bb_verify)
+        std::cout << bbr.str();
+
+    STXXL_CHECK(bbr == bb_verify);
+
+//! [deserialize]
+    // read binary block using binary_reader
+
+    stxxl::binary_reader br(bb);
+
+    STXXL_CHECK(br.get<unsigned int>() == 1);
+    STXXL_CHECK(br.get_string() == "test");
+    STXXL_CHECK(br.get_varint() == 42);
+    STXXL_CHECK(br.get_varint() == 12345678);
+
+    {
+        stxxl::binary_reader sub_br = br.get_binary_buffer_ref();
+        STXXL_CHECK(sub_br.get_string() == "sub block");
+        STXXL_CHECK(sub_br.get_varint() == 6 * 9);
+        STXXL_CHECK(sub_br.empty());
+    }
+
+    STXXL_CHECK(br.empty());
+//! [deserialize]
+}
+
+int main(int, char**)
+{
+    test1();
+    return 0;
+}
diff --git a/tests/common/test_external_shared_ptr.cpp b/tests/common/test_external_shared_ptr.cpp
new file mode 100644
index 0000000..7b260ee
--- /dev/null
+++ b/tests/common/test_external_shared_ptr.cpp
@@ -0,0 +1,291 @@
+/***************************************************************************
+ *  tests/common/test_external_shared_ptr.cpp
+ *
+ *  This file has been derived from the following tests written
+ *  by Roman Dementiev:
+ *     - test_vector.cpp
+ *     - test_map.cpp
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2011 Daniel Godas-Lopez <dgodas at gmail.com>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#include <iostream>
+#include <algorithm>
+#include <cmath>
+#include <stxxl/vector>
+#include <stxxl/scan>
+#include <stxxl/map>
+#include <stxxl/stats>
+
+#include <stxxl/bits/common/external_shared_ptr.h>
+#include <boost/make_shared.hpp>
+#include <boost/shared_ptr.hpp>
+
+struct actual_element   // 24 bytes, not a power of 2 intentionally
+{
+    stxxl::int64 key;
+    stxxl::int64 load0;
+    stxxl::int64 load1;
+
+    actual_element& operator = (stxxl::int64 i)
+    {
+        key = i;
+        load0 = i + 42;
+        load1 = i ^ 42;
+        return *this;
+    }
+
+    bool operator == (const actual_element& e2) const
+    {
+        return key == e2.key && load0 == e2.load0 && load1 == e2.load1;
+    }
+};
+
+typedef boost::shared_ptr<actual_element> actual_element_ptr;
+typedef stxxl::external_shared_ptr<actual_element_ptr> element;
+
+struct counter
+{
+    int value;
+    counter(int v) : value(v) { }
+    int operator () ()
+    {
+        int old_val = value;
+        value++;
+        return old_val;
+    }
+};
+
+template <class my_vec_type>
+void test_const_iterator(const my_vec_type& x)
+{
+    typename my_vec_type::const_iterator i = x.begin();
+    i = x.end() - 1;
+    i.block_externally_updated();
+    i.flush();
+    i++;
+    ++i;
+    --i;
+    i--;
+    *i;
+}
+
+void test_vector()
+{
+    // use non-randomized striping to avoid side effects on random generator
+    typedef stxxl::VECTOR_GENERATOR<element, 2, 2, (2* 1024* 1024), stxxl::striping>::result vector_type;
+    vector_type v(64 * 1024 * 1024 / sizeof(element));
+
+    // test assignment const_iterator = iterator
+    vector_type::const_iterator c_it = v.begin();
+    STXXL_UNUSED(c_it);
+
+    test_const_iterator(v);
+
+    stxxl::random_number32 rnd;
+    int offset = rnd();
+
+    STXXL_MSG("write " << v.size() << " elements");
+
+    stxxl::ran32State = 0xdeadbeef;
+    vector_type::size_type i;
+
+    // fill the vector with increasing sequence of integer numbers
+    for (i = 0; i < v.size(); ++i)
+    {
+        actual_element_ptr aep(boost::make_shared<actual_element>());
+        aep->key = i + offset;
+        element e(aep);
+
+        v[i] = e;
+
+        STXXL_CHECK(v[i].get()->key == stxxl::int64(i + offset));
+    }
+
+    // fill the vector with random numbers
+    for (i = 0; i < v.size(); ++i)
+    {
+        actual_element_ptr aep(boost::make_shared<actual_element>());
+        aep->key = rnd();
+        element e(aep);
+
+        v[i].unwrap();
+        v[i] = e;
+
+        STXXL_CHECK(v[i].get()->key == aep->key);
+    }
+    v.flush();
+
+    STXXL_MSG("seq read of " << v.size() << " elements");
+
+    stxxl::ran32State = 0xdeadbeef;
+
+    // testing swap
+    vector_type a;
+    std::swap(v, a);
+    std::swap(v, a);
+
+    for (i = 0; i < v.size(); i++)
+        STXXL_CHECK(v[i].get()->key == rnd());
+
+    // check again
+    STXXL_MSG("clear");
+
+    for (vector_type::iterator it = v.begin(); it != v.end(); ++it)
+        it->unwrap();
+
+    v.clear();
+
+    stxxl::ran32State = 0xdeadbeef + 10;
+
+    v.resize(64 * 1024 * 1024 / sizeof(element));
+
+    STXXL_MSG("write " << v.size() << " elements");
+    for (i = 0; i < v.size(); ++i)
+    {
+        actual_element_ptr aep(boost::make_shared<actual_element>());
+        aep->key = rnd();
+        element e(aep);
+
+        v[i] = e;
+
+        STXXL_CHECK(v[i].get()->key == aep->key);
+    }
+
+    stxxl::ran32State = 0xdeadbeef + 10;
+
+    STXXL_MSG("seq read of " << v.size() << " elements");
+
+    for (i = 0; i < v.size(); i++)
+        STXXL_CHECK(v[i].get()->key == rnd());
+
+    STXXL_MSG("copy vector of " << v.size() << " elements");
+
+    vector_type v_copy0(v);
+    STXXL_CHECK(v == v_copy0);
+
+    vector_type v_copy1;
+    v_copy1 = v;
+    STXXL_CHECK(v == v_copy1);
+
+    while (v.size() != 0) {
+        element e = v.back();
+        v.pop_back();
+        e.unwrap();
+    }
+}
+
+typedef size_t key_type;
+
+struct test_data {
+    unsigned char a;
+    unsigned long b[3];
+    unsigned int c;
+};
+
+typedef boost::shared_ptr<test_data> test_data_ptr;
+typedef stxxl::external_shared_ptr<test_data_ptr> data_type;
+
+struct cmp : public std::less<key_type>
+{
+    static key_type min_value()
+    {
+        return (std::numeric_limits<key_type>::min)();
+    }
+    static key_type max_value()
+    {
+        return (std::numeric_limits<key_type>::max)();
+    }
+};
+
+#define BLOCK_SIZE (32 * 1024)
+#define CACHE_SIZE (2 * 1024 * 1024 / BLOCK_SIZE)
+
+#define CACHE_ELEMENTS (BLOCK_SIZE * CACHE_SIZE / (sizeof(key_type) + sizeof(data_type)))
+
+typedef stxxl::map<key_type, data_type, cmp, BLOCK_SIZE, BLOCK_SIZE> map_type;
+
+void test_map()
+{
+    const unsigned max_mult = 8;
+
+    stxxl::stats_data stats_begin(*stxxl::stats::get_instance());
+    stxxl::stats_data stats_elapsed;
+    STXXL_MSG(stats_begin);
+
+    STXXL_MSG("Block size " << BLOCK_SIZE / 1024 << " KiB");
+    STXXL_MSG("Cache size " << (CACHE_SIZE * BLOCK_SIZE) / 1024 << " KiB");
+
+    for (unsigned mult = 1; mult < max_mult; mult *= 2)
+    {
+        stats_begin = *stxxl::stats::get_instance();
+        const size_t el = mult * (CACHE_ELEMENTS / 8);
+        STXXL_MSG("Elements to insert " << el << " volume =" <<
+                  (el * (sizeof(key_type) + sizeof(data_type))) / 1024 << " KiB");
+        map_type* DMap = new map_type(CACHE_SIZE * BLOCK_SIZE / 2, CACHE_SIZE * BLOCK_SIZE / 2);
+        map_type& Map = *DMap;
+
+        for (size_t i = 0; i < el; ++i)
+        {
+            test_data_ptr test = boost::make_shared<test_data>();
+
+            test->a = (unsigned char)(i + 1);
+            for (unsigned j = 0; j < 3; j++)
+                test->b[j] = (unsigned long)(i + 2);
+            test->c = (unsigned int)(i + 3);
+
+            data_type data(test);
+            Map[i] = data;
+        }
+        stats_elapsed = stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin;
+        double writes = double(stats_elapsed.get_writes()) / double(el);
+        double logel = log(double(el)) / log(double(BLOCK_SIZE));
+        STXXL_MSG("Logs: writes " << writes << " logel " << logel << " writes/logel " << (writes / logel));
+        STXXL_MSG(stats_elapsed);
+
+        stats_begin = *stxxl::stats::get_instance();
+        STXXL_MSG("Doing search");
+        size_t queries = el;
+        stxxl::random_number32 myrandom;
+        for (unsigned i = 0; i < queries; ++i)
+        {
+            key_type key = myrandom() % el;
+            map_type::iterator result = Map.find(key);
+
+            data_type data = (*result).second;
+            test_data_ptr tmp = data.get();
+
+            STXXL_CHECK(tmp->a == (unsigned char)(key + 1));
+            for (unsigned j = 0; j < 3; ++j)
+                STXXL_CHECK(tmp->b[j] == (unsigned long)(key + 2));
+            STXXL_CHECK(tmp->c == (unsigned int)(key + 3));
+        }
+        stats_elapsed = stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin;
+        double reads = double(stats_elapsed.get_reads()) / logel;
+        double readsperq = double(stats_elapsed.get_reads()) / (double)queries;
+        STXXL_MSG("reads/logel " << reads << " readsperq " << readsperq);
+        STXXL_MSG(stats_elapsed);
+
+        while (Map.size() != 0) {
+            map_type::iterator it = Map.begin();
+            data_type data = (*it).second;
+            Map.erase(it);
+            data.unwrap();
+        }
+
+        delete DMap;
+    }
+}
+
+int main()
+{
+    test_vector();
+    test_map();
+    return 0;
+}
diff --git a/tests/containers/CMakeLists.txt b/tests/containers/CMakeLists.txt
index 8865c3f..ee911e5 100644
--- a/tests/containers/CMakeLists.txt
+++ b/tests/containers/CMakeLists.txt
@@ -11,6 +11,7 @@
 ############################################################################
 
 add_subdirectory(btree)
+add_subdirectory(hash_map)
 
 stxxl_build_test(test_deque)
 stxxl_build_test(test_ext_merger)
@@ -61,7 +62,7 @@ endif(USE_BOOST)
 stxxl_build_test(test_map)
 stxxl_build_test(test_map_random)
 
-stxxl_test(test_map 12)
+stxxl_test(test_map 8)
 stxxl_test(test_map_random 2000)
 
 #-tb longer test for map
diff --git a/tests/containers/btree/CMakeLists.txt b/tests/containers/btree/CMakeLists.txt
index b46df1a..c2271f9 100644
--- a/tests/containers/btree/CMakeLists.txt
+++ b/tests/containers/btree/CMakeLists.txt
@@ -3,7 +3,7 @@
 #
 #  Part of the STXXL. See http://stxxl.sourceforge.net
 #
-#  Copyright (C) 2013 Timo Bingmann <tb at panthema.net>
+#  Copyright (C) 2013-2014 Timo Bingmann <tb at panthema.net>
 #
 #  Distributed under the Boost Software License, Version 1.0.
 #  (See accompanying file LICENSE_1_0.txt or copy at
@@ -11,17 +11,17 @@
 ############################################################################
 
 stxxl_build_test(test_btree)
-stxxl_build_test(test_const_scan)
-stxxl_build_test(test_corr_insert_erase)
-stxxl_build_test(test_corr_insert_find)
-stxxl_build_test(test_corr_insert_scan)
+stxxl_build_test(test_btree_const_scan)
+stxxl_build_test(test_btree_insert_erase)
+stxxl_build_test(test_btree_insert_find)
+stxxl_build_test(test_btree_insert_scan)
 
 stxxl_test(test_btree 10000)
 stxxl_test(test_btree 100000)
 stxxl_test(test_btree 1000000)
-stxxl_test(test_const_scan 10000)
-stxxl_test(test_const_scan 100000)
-stxxl_test(test_const_scan 1000000)
-stxxl_test(test_corr_insert_erase 14)
-stxxl_test(test_corr_insert_find 14)
-stxxl_test(test_corr_insert_scan 14)
+stxxl_test(test_btree_const_scan 10000)
+stxxl_test(test_btree_const_scan 100000)
+stxxl_test(test_btree_const_scan 1000000)
+stxxl_test(test_btree_insert_erase 14)
+stxxl_test(test_btree_insert_find 14)
+stxxl_test(test_btree_insert_scan 14)
diff --git a/tests/containers/btree/test_btree.cpp b/tests/containers/btree/test_btree.cpp
index e0c96e2..6fca3f5 100644
--- a/tests/containers/btree/test_btree.cpp
+++ b/tests/containers/btree/test_btree.cpp
@@ -18,7 +18,6 @@
 #include <stxxl/stats>
 #include <stxxl/timer>
 
-
 struct comp_type : public std::less<int>
 {
     static int max_value()
@@ -55,7 +54,6 @@ int main(int argc, char* argv[])
     if (nins < 100)
         nins = 100;
 
-
     stxxl::random_number32 rnd;
 
     // .begin() .end() test
@@ -88,8 +86,7 @@ int main(int argc, char* argv[])
     it = BTree1.find(1000);
     STXXL_CHECK(it == BTree1.end());
 
-
-    stxxl::unsigned_type f = BTree1.erase(5);
+    btree_type::size_type f = BTree1.erase(5);
     STXXL_CHECK(f == 1);
     f = BTree1.erase(6);
     STXXL_CHECK(f == 0);
@@ -180,7 +177,6 @@ int main(int argc, char* argv[])
     }
     STXXL_MSG("Size of map: " << BTree1.size());
 
-
     BTree1.clear();
 
     for (unsigned int i = 0; i < nins / 2; ++i)
@@ -222,7 +218,6 @@ int main(int argc, char* argv[])
     STXXL_MSG("Size of Btree4 after erase: " << BTree4.size());
     STXXL_CHECK(BTree4.size() == 1);
 
-
     STXXL_MSG("Size of Btree1 before erase: " << BTree1.size());
     BTree1.erase(BTree1.begin(), BTree1.end());
     STXXL_MSG("Size of Btree1 after erase: " << BTree1.size());
@@ -264,7 +259,6 @@ int main(int argc, char* argv[])
 
     STXXL_CHECK(CBTree3.max_size() >= CBTree3.size());
 
-
     CBTree3.key_comp();
     CBTree3.value_comp();
 
diff --git a/tests/containers/btree/test_const_scan.cpp b/tests/containers/btree/test_btree_const_scan.cpp
similarity index 98%
rename from tests/containers/btree/test_const_scan.cpp
rename to tests/containers/btree/test_btree_const_scan.cpp
index d0d57d0..aa6d255 100644
--- a/tests/containers/btree/test_const_scan.cpp
+++ b/tests/containers/btree/test_btree_const_scan.cpp
@@ -1,5 +1,5 @@
 /***************************************************************************
- *  tests/containers/btree/test_const_scan.cpp
+ *  tests/containers/btree/test_btree_const_scan.cpp
  *
  *  Part of the STXXL. See http://stxxl.sourceforge.net
  *
@@ -15,7 +15,6 @@
 #include <stxxl/bits/containers/btree/btree.h>
 #include <stxxl/timer>
 
-
 struct comp_type : public std::less<int>
 {
     static int max_value()
@@ -113,7 +112,6 @@ int main(int argc, char* argv[])
         btree_type BTree1(Data.begin(), Data.end(), comp_type(), node_cache_size, leaf_cache_size, true);
         btree_type BTree2(Data.begin(), Data.end(), comp_type(), node_cache_size, leaf_cache_size, true);
 
-
         //STXXL_MSG(*stxxl::stats::get_instance());
 
         C(BTree1);
diff --git a/tests/containers/btree/test_corr_insert_erase.cpp b/tests/containers/btree/test_btree_insert_erase.cpp
similarity index 98%
rename from tests/containers/btree/test_corr_insert_erase.cpp
rename to tests/containers/btree/test_btree_insert_erase.cpp
index 0d774cf..a299ed6 100644
--- a/tests/containers/btree/test_corr_insert_erase.cpp
+++ b/tests/containers/btree/test_btree_insert_erase.cpp
@@ -1,5 +1,5 @@
 /***************************************************************************
- *  tests/containers/btree/test_corr_insert_erase.cpp
+ *  tests/containers/btree/test_btree_insert_erase.cpp
  *
  *  Part of the STXXL. See http://stxxl.sourceforge.net
  *
@@ -18,7 +18,6 @@
 #include <stxxl/sort>
 #include <stxxl/random_shuffle>
 
-
 struct comp_type : public std::less<int>
 {
     static int max_value()
@@ -40,7 +39,6 @@ std::ostream& operator << (std::ostream& o, const std::pair<int, double>& obj)
     return o;
 }
 
-
 struct rnd_gen
 {
     stxxl::random_number32 rnd;
@@ -75,7 +73,6 @@ int main(int argc, char* argv[])
 
     stxxl::ran32State = (unsigned int)time(NULL);
 
-
     stxxl::vector<int> Values(nins);
     STXXL_MSG("Generating " << nins << " random values");
     stxxl::generate(Values.begin(), Values.end(), rnd_gen(), 4);
@@ -95,7 +92,6 @@ int main(int argc, char* argv[])
     for ( ; it != Values.end(); ++it)
         BTree.insert(std::pair<int, double>(*it, double(*it) + 1.0));
 
-
     STXXL_MSG("Number of elements in btree: " << BTree.size());
 
     STXXL_MSG("Searching " << Values.size() << " existing elements and erasing them");
diff --git a/tests/containers/btree/test_corr_insert_find.cpp b/tests/containers/btree/test_btree_insert_find.cpp
similarity index 97%
rename from tests/containers/btree/test_corr_insert_find.cpp
rename to tests/containers/btree/test_btree_insert_find.cpp
index fe8bcc6..34315fb 100644
--- a/tests/containers/btree/test_corr_insert_find.cpp
+++ b/tests/containers/btree/test_btree_insert_find.cpp
@@ -1,5 +1,5 @@
 /***************************************************************************
- *  tests/containers/btree/test_corr_insert_find.cpp
+ *  tests/containers/btree/test_btree_insert_find.cpp
  *
  *  Part of the STXXL. See http://stxxl.sourceforge.net
  *
@@ -16,7 +16,6 @@
 #include <stxxl/bits/containers/btree/btree.h>
 #include <stxxl/scan>
 
-
 struct comp_type : public std::less<int>
 {
     static int max_value()
@@ -31,14 +30,12 @@ struct comp_type : public std::less<int>
 
 typedef stxxl::btree::btree<int, double, comp_type, 4096, 4096, stxxl::SR> btree_type;
 
-
 std::ostream& operator << (std::ostream& o, const std::pair<int, double>& obj)
 {
     o << obj.first << " " << obj.second;
     return o;
 }
 
-
 struct rnd_gen
 {
     stxxl::random_number32 rnd;
@@ -73,7 +70,6 @@ int main(int argc, char* argv[])
 
     stxxl::ran32State = (unsigned int)time(NULL);
 
-
     stxxl::vector<int> Values(nins);
     STXXL_MSG("Generating " << nins << " random values");
     stxxl::generate(Values.begin(), Values.end(), rnd_gen(), 4);
@@ -83,7 +79,6 @@ int main(int argc, char* argv[])
     for ( ; it != Values.end(); ++it)
         BTree.insert(std::pair<int, double>(*it, double(*it) + 1.0));
 
-
     STXXL_MSG("Number of elements in btree: " << BTree.size());
 
     STXXL_MSG("Searching " << nins << " existing elements");
diff --git a/tests/containers/btree/test_corr_insert_scan.cpp b/tests/containers/btree/test_btree_insert_scan.cpp
similarity index 98%
rename from tests/containers/btree/test_corr_insert_scan.cpp
rename to tests/containers/btree/test_btree_insert_scan.cpp
index 862bfe4..d2b283b 100644
--- a/tests/containers/btree/test_corr_insert_scan.cpp
+++ b/tests/containers/btree/test_btree_insert_scan.cpp
@@ -1,5 +1,5 @@
 /***************************************************************************
- *  tests/containers/btree/test_corr_insert_scan.cpp
+ *  tests/containers/btree/test_btree_insert_scan.cpp
  *
  *  Part of the STXXL. See http://stxxl.sourceforge.net
  *
@@ -17,7 +17,6 @@
 #include <stxxl/scan>
 #include <stxxl/sort>
 
-
 struct comp_type : public std::less<int>
 {
     static int max_value()
@@ -39,7 +38,6 @@ std::ostream& operator << (std::ostream& o, const std::pair<int, double>& obj)
     return o;
 }
 
-
 struct rnd_gen
 {
     stxxl::random_number32 rnd;
@@ -83,7 +81,6 @@ int main(int argc, char* argv[])
     for ( ; it != Values.end(); ++it)
         BTree.insert(std::pair<int, double>(*it, double(*it) + 1.0));
 
-
     STXXL_MSG("Sorting the random values");
     stxxl::sort(Values.begin(), Values.end(), comp_type(), 128 * 1024 * 1024);
 
diff --git a/tests/containers/hash_map/CMakeLists.txt b/tests/containers/hash_map/CMakeLists.txt
new file mode 100644
index 0000000..3a55ee3
--- /dev/null
+++ b/tests/containers/hash_map/CMakeLists.txt
@@ -0,0 +1,21 @@
+############################################################################
+#  tests/containers/hash_map/CMakeLists.txt
+#
+#  Part of the STXXL. See http://stxxl.sourceforge.net
+#
+#  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+#
+#  Distributed under the Boost Software License, Version 1.0.
+#  (See accompanying file LICENSE_1_0.txt or copy at
+#  http://www.boost.org/LICENSE_1_0.txt)
+############################################################################
+
+stxxl_build_test(test_hash_map)
+stxxl_build_test(test_hash_map_block_cache)
+stxxl_build_test(test_hash_map_iterators)
+stxxl_build_test(test_hash_map_reader_writer)
+
+stxxl_test(test_hash_map)
+stxxl_test(test_hash_map_block_cache)
+stxxl_test(test_hash_map_iterators)
+stxxl_test(test_hash_map_reader_writer)
diff --git a/tests/containers/hash_map/test_hash_map.cpp b/tests/containers/hash_map/test_hash_map.cpp
new file mode 100644
index 0000000..e26cfbf
--- /dev/null
+++ b/tests/containers/hash_map/test_hash_map.cpp
@@ -0,0 +1,317 @@
+/***************************************************************************
+ *  tests/containers/hash_map/test_hash_map.cpp
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2007 Markus Westphal <marwes at users.sourceforge.net>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#include <iostream>
+
+#include <stxxl.h>
+#include <stxxl/bits/common/seed.h>
+#include <stxxl/bits/common/rand.h>
+
+using stxxl::unsigned_type;
+
+struct rand_pairs
+{
+    stxxl::random_number32& rand_;
+
+    rand_pairs(stxxl::random_number32& rand)
+        : rand_(rand)
+    { }
+
+    std::pair<int, int> operator () ()
+    {
+        int v = (int)rand_();
+        return std::pair<int, int>(v, v);
+    }
+};
+
+struct hash_int
+{
+    size_t operator () (int key) const
+    {
+        // a simple integer hash function
+        return (size_t)(key * 2654435761u);
+    }
+};
+
+struct cmp : public std::less<int>
+{
+    int min_value() const { return std::numeric_limits<int>::min(); }
+    int max_value() const { return std::numeric_limits<int>::max(); }
+};
+
+// forced instantiation
+template class stxxl::unordered_map<int, int, hash_int, cmp, 4* 1024, 4>;
+
+struct structA
+{
+    int x, y;
+
+    structA() { }
+    structA(int _x, int _y) : x(_x), y(_y) { }
+};
+
+struct structB
+{
+    double u, v;
+};
+
+struct hash_structA
+{
+    size_t operator () (const structA& key) const
+    {
+        // a simple integer hash function
+        return (size_t)((key.x + key.y) * 2654435761u);
+    }
+};
+
+struct cmp_structA
+{
+    bool operator () (const structA& a, const structA& b) const
+    {
+        if (a.x == b.x) return a.y < b.y;
+        return a.x < b.x;
+    }
+
+    structA min_value() const
+    {
+        return structA(std::numeric_limits<int>::min(),
+                       std::numeric_limits<int>::min());
+    }
+    structA max_value() const
+    {
+        return structA(std::numeric_limits<int>::max(),
+                       std::numeric_limits<int>::max());
+    }
+};
+
+// forced instantiation of a struct
+template class stxxl::unordered_map<
+    structA, structB, hash_structA, cmp_structA, 4* 1024, 4
+    >;
+
+void basic_test()
+{
+    typedef std::pair<int, int> value_type;
+    const unsigned_type value_size = sizeof(value_type);
+
+    const unsigned_type n_values = 20000;
+    const unsigned_type n_tests = 10000;
+
+    // make sure all changes will be buffered (*)
+    const unsigned_type buffer_size = 5 * n_values * (value_size + sizeof(int*));
+
+    const unsigned_type mem_to_sort = 32 * 1024 * 1024;
+
+    const unsigned_type subblock_raw_size = 4 * 1024;
+    const unsigned_type block_size = 4;
+
+    typedef stxxl::unordered_map<int, int, hash_int, cmp,
+                                 subblock_raw_size, block_size> unordered_map;
+    typedef unordered_map::iterator iterator;
+    typedef unordered_map::const_iterator const_iterator;
+
+    stxxl::stats_data stats_begin;
+
+    unordered_map map;
+    map.max_buffer_size(buffer_size);
+    const unordered_map& cmap = map;
+
+    // generate random values
+    stxxl::random_number32 rand32;
+
+    std::vector<value_type> values1(n_values);
+    std::vector<value_type> values2(n_values);
+    std::vector<value_type> values3(n_values / 2);
+    std::generate(values1.begin(), values1.end(), rand_pairs(rand32) _STXXL_FORCE_SEQUENTIAL);
+    std::generate(values2.begin(), values2.end(), rand_pairs(rand32) _STXXL_FORCE_SEQUENTIAL);
+    std::generate(values3.begin(), values3.end(), rand_pairs(rand32) _STXXL_FORCE_SEQUENTIAL);
+
+    // --- initial import
+    std::cout << "Initial import...";
+    stats_begin = *stxxl::stats::get_instance();
+
+    STXXL_CHECK(map.begin() == map.end());
+    map.insert(values1.begin(), values1.end(), mem_to_sort);
+    STXXL_CHECK(map.begin() != map.end());
+    STXXL_CHECK(map.size() == n_values);
+
+    std::cout << "passed" << std::endl;
+    STXXL_MSG(stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin);
+
+    // (*) all these values are stored in external memory; the remaining
+    // changes will be buffered in internal memory
+
+    // --- insert: new (from values2) and existing (from values1) values, with
+    // --- and without checking
+    std::cout << "Insert...";
+    stats_begin = *stxxl::stats::get_instance();
+
+    for (unsigned_type i = 0; i < n_values / 2; i++) {
+        // new without checking
+        map.insert_oblivious(values2[2 * i]);
+        // new with checking
+        std::pair<iterator, bool> res = map.insert(values2[2 * i + 1]);
+        STXXL_CHECK(res.second && (*(res.first)).first == values2[2 * i + 1].first);
+        // existing without checking
+        map.insert_oblivious(values1[2 * i]);
+        // exiting with checking
+        res = map.insert(values1[2 * i + 1]);
+        STXXL_CHECK(!res.second && (*(res.first)).first == values1[2 * i + 1].first);
+    }
+
+    STXXL_CHECK(map.size() == 2 * n_values);
+    std::cout << "passed" << std::endl;
+    STXXL_MSG(stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin);
+
+    // "old" values are stored in external memory, "new" values are stored in
+    // internal memory
+
+    // --- find: existing (from external and internal memory) and non-existing
+    // --- values
+    std::cout << "Find...";
+    stats_begin = *stxxl::stats::get_instance();
+
+    std::random_shuffle(values1.begin(), values1.end());
+    std::random_shuffle(values2.begin(), values2.end());
+    for (unsigned_type i = 0; i < n_tests; i++) {
+        STXXL_CHECK(cmap.find(values1[i].first) != cmap.end());
+        STXXL_CHECK(cmap.find(values2[i].first) != cmap.end());
+        STXXL_CHECK(cmap.find(values3[i].first) == cmap.end());
+    }
+    std::cout << "passed" << std::endl;
+    STXXL_MSG(stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin);
+
+    // --- insert with overwriting
+    std::cout << "Insert with overwriting...";
+    stats_begin = *stxxl::stats::get_instance();
+
+    std::random_shuffle(values1.begin(), values1.end());
+    std::random_shuffle(values2.begin(), values2.end());
+    for (unsigned_type i = 0; i < n_tests; i++) {
+        value_type value1 = values1[i];         // in external memory
+        value1.second++;
+        map.insert_oblivious(value1);
+
+        value_type value2 = values2[i];         // in internal memory
+        value2.second++;
+        map.insert_oblivious(value2);
+    }
+    // now check
+    STXXL_CHECK(map.size() == 2 * n_values);         // nothing added, nothing removed
+    for (unsigned_type i = 0; i < n_tests; i++) {
+        const_iterator it1 = cmap.find(values1[i].first);
+        const_iterator it2 = cmap.find(values2[i].first);
+
+        STXXL_CHECK((*it1).second == values1[i].second + 1);
+        STXXL_CHECK((*it2).second == values2[i].second + 1);
+    }
+    std::cout << "passed" << std::endl;
+    STXXL_MSG(stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin);
+
+    // --- erase: existing and non-existing values, with and without checking
+    std::cout << "Erase...";
+    stats_begin = *stxxl::stats::get_instance();
+
+    std::random_shuffle(values1.begin(), values1.end());
+    std::random_shuffle(values2.begin(), values2.end());
+    std::random_shuffle(values3.begin(), values3.end());
+    for (unsigned_type i = 0; i < n_tests / 2; i++) {        // external
+        // existing without checking
+        map.erase_oblivious(values1[2 * i].first);
+        // existing with checking
+        STXXL_CHECK(map.erase(values1[2 * i + 1].first) == 1);
+    }
+    for (unsigned_type i = 0; i < n_tests / 2; i++) {        // internal
+        // existing without checking
+        map.erase_oblivious(values2[2 * i].first);
+        // existing with checking
+        STXXL_CHECK(map.erase(values2[2 * i + 1].first) == 1);
+        // non-existing without checking
+        map.erase_oblivious(values3[i].first);
+        // non-existing with checking
+    }
+    STXXL_CHECK(map.size() == 2 * n_values - 2 * n_tests);
+    std::cout << "passed" << std::endl;
+    STXXL_MSG(stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin);
+
+    map.clear();
+    STXXL_CHECK(map.size() == 0);
+
+    // --- find and manipulate values by []-operator
+
+    // make sure there are some values in our unordered_map: externally
+    // [0..n/2) and internally [n/2..n) from values1
+    std::cout << "[ ]-operator...";
+    stats_begin = *stxxl::stats::get_instance();
+
+    map.insert(values1.begin(), values1.begin() + n_values / 2, mem_to_sort);
+    for (unsigned_type i = n_values / 2; i < n_values; i++) {
+        map.insert_oblivious(values1[i]);
+    }
+    // lookup of existing values
+    STXXL_CHECK(map[values1[5].first] == values1[5].second);                               // external
+    STXXL_CHECK(map[values1[n_values / 2 + 5].first] == values1[n_values / 2 + 5].second); // internal
+    // manipulate existing values
+    ++(map[values1[7].first]);
+    ++(map[values1[n_values / 2 + 7].first]);
+    {
+        const_iterator cit1 = cmap.find(values1[7].first);
+        STXXL_CHECK((*cit1).second == (*cit1).first + 1);
+        const_iterator cit2 = cmap.find(values1[n_values / 2 + 7].first);
+        STXXL_CHECK((*cit2).second == (*cit2).first + 1);
+    }
+    // lookup of non-existing values
+    STXXL_CHECK(map[values2[5].first] == unordered_map::mapped_type());
+    // assignment of non-existing values
+    map[values2[7].first] = values2[7].second;
+    {
+        const_iterator cit = cmap.find(values2[7].first);
+        STXXL_CHECK((*cit).first == values2[7].second);
+    }
+    STXXL_CHECK(map.size() == n_values + 2);
+    std::cout << "passed" << std::endl;
+    STXXL_MSG(stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin);
+
+    map.clear();
+    STXXL_CHECK(map.size() == 0);
+
+    // --- additional bulk insert test
+    std::cout << "additional bulk-insert...";
+    stats_begin = *stxxl::stats::get_instance();
+
+    map.insert(values1.begin(), values1.begin() + n_values / 2, mem_to_sort);
+    map.insert(values1.begin() + n_values / 2, values1.end(), mem_to_sort);
+    STXXL_CHECK(map.size() == n_values);
+    // lookup some random values
+    std::random_shuffle(values1.begin(), values1.end());
+    for (unsigned_type i = 0; i < n_tests; i++)
+        STXXL_CHECK(cmap.find(values1[i].first) != cmap.end());
+    std::cout << "passed" << std::endl;
+    STXXL_MSG(stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin);
+
+    // --- test equality predicate
+    unordered_map::key_equal key_eq = map.key_eq();
+    STXXL_CHECK(key_eq(42, 42));
+    STXXL_CHECK(!key_eq(42, 6 * 9));
+
+    std::cout << "\nAll tests passed" << std::endl;
+
+    map.buffer_size();
+}
+
+int main()
+{
+    basic_test();
+
+    return 0;
+}
diff --git a/tests/containers/hash_map/test_hash_map_block_cache.cpp b/tests/containers/hash_map/test_hash_map_block_cache.cpp
new file mode 100644
index 0000000..afffa36
--- /dev/null
+++ b/tests/containers/hash_map/test_hash_map_block_cache.cpp
@@ -0,0 +1,155 @@
+/***************************************************************************
+ *  tests/containers/hash_map/test_hash_map_block_cache.cpp
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2007 Markus Westphal <marwes at users.sourceforge.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#include <iostream>
+
+#include <stxxl.h>
+#include <stxxl/bits/common/seed.h>
+#include <stxxl/bits/containers/hash_map/block_cache.h>
+
+bool test_block_cache()
+{
+    typedef std::pair<int, int> value_type;
+
+    const unsigned subblock_raw_size = 1024 * 8; // 8KB subblocks
+    const unsigned block_size = 128;             // 1MB blocks (=128 subblocks)
+
+    const unsigned num_blocks = 64;              // number of blocks to use for this test
+    const unsigned cache_size = 8;               // size of cache in blocks
+
+    typedef stxxl::typed_block<subblock_raw_size, value_type> subblock_type;
+    typedef stxxl::typed_block<block_size* sizeof(subblock_type), subblock_type> block_type;
+
+    const unsigned subblock_size = subblock_type::size;          // size in values
+
+    typedef block_type::bid_type bid_type;
+    typedef std::vector<bid_type> bid_container_type;
+
+    // prepare test: allocate blocks, fill them with values and write to disk
+    bid_container_type bids(num_blocks);
+    stxxl::block_manager* bm = stxxl::block_manager::get_instance();
+    bm->new_blocks(stxxl::striping(), bids.begin(), bids.end());
+
+    block_type* block = new block_type;
+    for (unsigned i_block = 0; i_block < num_blocks; i_block++) {
+        for (unsigned i_subblock = 0; i_subblock < block_size; i_subblock++) {
+            for (unsigned i_value = 0; i_value < subblock_size; i_value++) {
+                int value = i_value + i_subblock * subblock_size + i_block * block_size;
+                (*block)[i_subblock][i_value] = value_type(value, value);
+            }
+        }
+        stxxl::request_ptr req = block->write(bids[i_block]);
+        req->wait();
+    }
+
+    stxxl::random_number32 rand32;
+
+    // create block_cache
+    typedef stxxl::hash_map::block_cache<block_type> cache_type;
+    cache_type cache(cache_size);
+
+    // load random subblocks and check for values
+    int n_runs = cache_size * 10;
+    for (int i_run = 0; i_run < n_runs; i_run++) {
+        int i_block = rand32() % num_blocks;
+        int i_subblock = rand32() % block_size;
+
+        subblock_type* subblock = cache.get_subblock(bids[i_block], i_subblock);
+
+        int expected = i_block * block_size + i_subblock * subblock_size + 1;
+        STXXL_CHECK((*subblock)[1].first == expected);
+    }
+
+    // do the same again but this time with prefetching
+    for (int i_run = 0; i_run < n_runs; i_run++) {
+        int i_block = rand32() % num_blocks;
+        int i_subblock = rand32() % block_size;
+
+        cache.prefetch_block(bids[i_block]);
+        subblock_type* subblock = cache.get_subblock(bids[i_block], i_subblock);
+        int expected = i_block * block_size + i_subblock * subblock_size + 1;
+        STXXL_CHECK((*subblock)[1].first == expected);
+    }
+
+    // load and modify some subblocks; flush cache and check values
+    unsigned myseed = stxxl::get_next_seed();
+    stxxl::set_seed(myseed);
+    for (int i_run = 0; i_run < n_runs; i_run++) {
+        int i_block = rand32() % num_blocks;
+        int i_subblock = rand32() % block_size;
+
+        subblock_type* subblock = cache.get_subblock(bids[i_block], i_subblock);
+
+        STXXL_CHECK(cache.make_dirty(bids[i_block]));
+        (*subblock)[1].first = (*subblock)[1].second + 42;
+    }
+    stxxl::set_seed(myseed);
+    for (int i_run = 0; i_run < n_runs; i_run++) {
+        int i_block = rand32() % num_blocks;
+        int i_subblock = rand32() % block_size;
+        subblock_type* subblock = cache.get_subblock(bids[i_block], i_subblock);
+
+        int expected = i_block * block_size + i_subblock * subblock_size + 1;
+        STXXL_CHECK((*subblock)[1].first == expected + 42);
+    }
+
+    // test retaining
+    cache.clear();
+
+    // not yet cached
+    STXXL_CHECK(cache.retain_block(bids[0]) == false);
+    cache.prefetch_block(bids[0]);
+
+    // cached, should be retained
+    STXXL_CHECK(cache.retain_block(bids[0]) == true);
+    // release again
+    STXXL_CHECK(cache.release_block(bids[0]) == true);
+    // retrain-count should be 0, release fails
+    STXXL_CHECK(cache.release_block(bids[0]) == false);
+
+    // cache new block
+    subblock_type* kicked_subblock = cache.get_subblock(bids[1], 0);
+    // load other blocks, so that kicked_subblock, well, gets kicked
+    for (unsigned i = 0; i < cache_size + 5; i++) {
+        cache.prefetch_block(bids[i + 3]);
+    }
+    // load kicked subblock again, should be at a different location
+    STXXL_CHECK(cache.get_subblock(bids[1], 0) != kicked_subblock);
+
+    subblock_type* retained_subblock = cache.get_subblock(bids[1], 0);
+    // now retain subblock
+    STXXL_CHECK(cache.retain_block(bids[1]) == true);
+    for (unsigned i = 0; i < cache_size + 5; i++) {
+        cache.prefetch_block(bids[i + 3]);
+    }
+    // retained_subblock should not have been kicked
+    STXXL_CHECK(cache.get_subblock(bids[1], 0) == retained_subblock);
+    cache.clear();
+
+    // test swapping
+    subblock_type* a_subblock = cache.get_subblock(bids[6], 1);
+    cache_type cache2(cache_size / 2);
+    std::swap(cache, cache2);
+    STXXL_CHECK(cache.size() == cache_size / 2);
+    STXXL_CHECK(cache2.size() == cache_size);
+    STXXL_CHECK(cache2.get_subblock(bids[6], 1) == a_subblock);
+
+    STXXL_MSG("Passed Block-Cache Test");
+
+    return true;
+}
+
+int main()
+{
+    test_block_cache();
+    return 0;
+}
diff --git a/tests/containers/hash_map/test_hash_map_iterators.cpp b/tests/containers/hash_map/test_hash_map_iterators.cpp
new file mode 100644
index 0000000..c23b3f2
--- /dev/null
+++ b/tests/containers/hash_map/test_hash_map_iterators.cpp
@@ -0,0 +1,390 @@
+/***************************************************************************
+ *  tests/containers/hash_map/test_hash_map_iterators.cpp
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2007 Markus Westphal <marwes at users.sourceforge.net>
+ *  Copyright (C) 2014 Timo Bingmann <tb at panthema.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#include <iostream>
+
+#include <stxxl.h>
+#include <stxxl/bits/common/seed.h>
+#include <stxxl/bits/common/rand.h>
+#include <stxxl/bits/containers/hash_map/hash_map.h>
+#include <stxxl/bits/compat/hash_map.h>
+
+using stxxl::unsigned_type;
+
+struct rand_pairs
+{
+    stxxl::random_number32& rand_;
+
+    rand_pairs(stxxl::random_number32& rand)
+        : rand_(rand)
+    { }
+
+    std::pair<int, int> operator () ()
+    {
+        int v = rand_();
+        return std::pair<int, int>(v, v);
+    }
+};
+
+struct hash_int
+{
+    size_t operator () (int key) const
+    {
+        // a simple integer hash function
+        return (size_t)(key * 2654435761u);
+    }
+};
+
+struct cmp : public std::less<int>
+{
+    int min_value() const { return std::numeric_limits<int>::min(); }
+    int max_value() const { return std::numeric_limits<int>::max(); }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+void cmp_with_internal_map()
+{
+    typedef std::pair<int, int> value_type;
+    const unsigned_type value_size = sizeof(value_type);
+
+    const unsigned_type n_values = 15000;
+    const unsigned_type n_tests = 7500;
+
+    // make sure all changes will be buffered
+    const unsigned_type buffer_size = 5 * n_values * (value_size + sizeof(int*));
+    const unsigned_type mem_to_sort = 32 * 1024 * 1024;
+
+    const unsigned_type subblock_raw_size = 4 * 1024;
+    const unsigned_type block_size = 4;
+
+    typedef stxxl::hash_map::hash_map<int, int, hash_int, cmp,
+                                      subblock_raw_size, block_size> hash_map;
+    typedef hash_map::const_iterator const_iterator;
+
+    typedef stxxl::compat_hash_map<int, int>::result int_hash_map;
+
+    stxxl::stats_data stats_begin = *stxxl::stats::get_instance();
+
+    hash_map map;
+    map.max_buffer_size(buffer_size);
+    const hash_map& cmap = map;
+    int_hash_map int_map;
+
+    // generate random values
+    stxxl::random_number32 rand32;
+    std::vector<value_type> values1(n_values);
+    std::vector<value_type> values2(n_values);
+    std::vector<value_type> values3(n_values);
+    std::generate(values1.begin(), values1.end(), rand_pairs(rand32) _STXXL_FORCE_SEQUENTIAL);
+    std::generate(values2.begin(), values2.end(), rand_pairs(rand32) _STXXL_FORCE_SEQUENTIAL);
+    std::generate(values3.begin(), values3.end(), rand_pairs(rand32) _STXXL_FORCE_SEQUENTIAL);
+
+    // --- initial import: create a nice mix of externally (values1) and
+    // --- internally (values2) stored values
+    std::cout << "Initial import...";
+
+    map.insert(values1.begin(), values1.end(), mem_to_sort);
+    int_map.insert(values1.begin(), values1.end());
+
+    std::vector<value_type>::iterator val_it = values2.begin();
+    for ( ; val_it != values2.end(); ++val_it) {
+        map.insert_oblivious(*val_it);
+        int_map.insert(*val_it);
+    }
+
+    // --- erase and overwrite some external values
+    std::random_shuffle(values1.begin(), values1.end());
+    val_it = values1.begin();
+    for ( ; val_it != values1.begin() + n_tests; ++val_it) {
+        map.erase_oblivious(val_it->first);
+        int_map.erase(val_it->first);
+    }
+    for ( ; val_it != values1.begin() + 2 * n_tests; ++val_it) {
+        map.insert_oblivious(*val_it);
+        int_map.insert(*val_it);
+    }
+
+    // --- scan and compare with internal memory hash-map
+    std::cout << "Compare with internal-memory map...";
+    STXXL_CHECK(int_map.size() == map.size());
+    const_iterator cit = cmap.begin();
+    for ( ; cit != cmap.end(); ++cit) {
+        int key = (*cit).first;
+        STXXL_CHECK(int_map.find(key) != int_map.end());
+    }
+    std::cout << "passed" << std::endl;
+
+    // --- another bulk insert
+    std::cout << "Compare with internal-memory map after another bulk-insert...";
+    map.insert(values3.begin(), values3.end(), mem_to_sort);
+    int_map.insert(values3.begin(), values3.end());
+    STXXL_CHECK(map.size() == map.size());
+    cit = cmap.begin();
+    for ( ; cit != cmap.end(); ++cit) {
+        int key = (*cit).first;
+        STXXL_CHECK(int_map.find(key) != int_map.end());
+    }
+    std::cout << "passed" << std::endl;
+    STXXL_MSG(stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void basic_iterator_test()
+{
+    typedef std::pair<int, int> value_type;
+    const unsigned_type value_size = sizeof(value_type);
+
+    const unsigned_type n_values = 15000;
+    const unsigned_type n_tests = 7500;
+
+    // make sure all changes will be buffered
+    const unsigned_type buffer_size = 5 * n_values * (value_size + sizeof(int*));
+
+    const unsigned_type mem_to_sort = 32 * 1024 * 1024;
+
+    const unsigned_type subblock_raw_size = 4 * 1024;
+    const unsigned_type block_size = 4;
+
+    typedef stxxl::hash_map::hash_map<int, int, hash_int, cmp,
+                                      subblock_raw_size, block_size> hash_map;
+    typedef hash_map::iterator iterator;
+    typedef hash_map::const_iterator const_iterator;
+
+    stxxl::stats_data stats_begin = *stxxl::stats::get_instance();
+
+    hash_map map;
+    map.max_buffer_size(buffer_size);
+    const hash_map& cmap = map;
+
+    // generate random values
+    stxxl::random_number32 rand32;
+
+    std::vector<value_type> values1(n_values);
+    std::vector<value_type> values2(n_values);
+    std::generate(values1.begin(), values1.end(), rand_pairs(rand32) _STXXL_FORCE_SEQUENTIAL);
+    std::generate(values2.begin(), values2.end(), rand_pairs(rand32) _STXXL_FORCE_SEQUENTIAL);
+
+    // --- initial import: create a nice mix of externally (values1) and
+    // --- internally (values2) stored values
+    std::cout << "Initial import...";
+
+    STXXL_CHECK(map.begin() == map.end());
+    map.insert(values1.begin(), values1.end(), mem_to_sort);
+    for (std::vector<value_type>::iterator val_it = values2.begin();
+         val_it != values2.end(); ++val_it)
+        map.insert_oblivious(*val_it);
+    STXXL_CHECK(map.begin() != map.end());
+    STXXL_CHECK(map.size() == 2 * n_values);
+    std::cout << "passed" << std::endl;
+
+    // --- actual testing begins: modfiy random values via iterator
+    std::cout << "Lookup and modify...";
+    std::random_shuffle(values1.begin(), values1.end());
+    std::random_shuffle(values2.begin(), values2.end());
+    for (unsigned_type i = 0; i < n_tests; ++i) {
+        iterator it1 = map.find(values1[i].first);
+        iterator it2 = map.find(values2[i].first);
+        STXXL_CHECK(it1 != map.end());
+        STXXL_CHECK(it2 != map.end());
+        (*it1).second++;
+        (*it2).second++;
+    }
+    // check again
+    for (unsigned_type i = 0; i < n_tests; ++i) {
+        const_iterator cit1 = cmap.find(values1[i].first);
+        const_iterator cit2 = cmap.find(values2[i].first);
+        STXXL_CHECK(cit1 != map.end());
+        STXXL_CHECK(cit2 != map.end());
+        value_type value1 = *cit1;
+        value_type value2 = *cit2;
+        STXXL_CHECK(value1.second == value1.first + 1);
+        STXXL_CHECK(value2.second == value2.first + 1);
+    }
+    std::cout << "passed" << std::endl;
+
+    // --- scan and modify
+    std::cout << "Scan and modify...";
+    {
+        for (iterator it = map.begin(); it != map.end(); ++it)
+            (*it).second = (*it).first + 1;
+
+        for (const_iterator cit = cmap.begin(); cit != cmap.end(); ++cit) {
+            STXXL_CHECK((*cit).second == (*cit).first + 1);
+        }
+    }
+    std::cout << "passed" << std::endl;
+
+    // --- interator-value altered by insert_oblivious
+    std::cout << "Iterator-value altered by insert_oblivious...";
+    std::random_shuffle(values1.begin(), values1.end());
+    std::random_shuffle(values2.begin(), values2.end());
+    for (unsigned_type i = 0; i < n_tests; i++) {
+        int key1 = values1[i].first;
+        int key2 = values2[i].first;
+        const_iterator cit1 = cmap.find(key1);
+        STXXL_CHECK(cit1 != cmap.end());
+        const_iterator cit2 = cmap.find(key2);
+        STXXL_CHECK(cit2 != cmap.end());
+
+        map.insert_oblivious(value_type(key1, key1 + 3));
+        map.insert_oblivious(value_type(key2, key2 + 3));
+
+        STXXL_CHECK((*cit1).second == key1 + 3);
+        STXXL_CHECK((*cit2).second == key2 + 3);
+    }
+    std::cout << "passed" << std::endl;
+
+    // --- iterator-value altered by other iterator
+    std::cout << "Iterator-value altered by other iterator...";
+    std::random_shuffle(values1.begin(), values1.end());
+    std::random_shuffle(values2.begin(), values2.end());
+    for (unsigned_type i = 0; i < n_tests; i++) {
+        const_iterator cit1 = cmap.find(values1[i].first);
+        STXXL_CHECK(cit1 != cmap.end());
+        const_iterator cit2 = cmap.find(values2[i].first);
+        STXXL_CHECK(cit2 != cmap.end());
+        iterator it1 = map.find(values1[i].first);
+        STXXL_CHECK(it1 != map.end());
+        iterator it2 = map.find(values2[i].first);
+        STXXL_CHECK(it2 != map.end());
+
+        (*it1).second = (*it1).first + 5;
+        (*it2).second = (*it2).first + 5;
+        STXXL_CHECK((*cit1).second == (*cit1).first + 5);
+        STXXL_CHECK((*cit2).second == (*cit2).first + 5);
+    }
+    std::cout << "passed" << std::endl;
+
+    // --- erase by iterator
+    std::cout << "Erase by iterator...";
+    std::random_shuffle(values1.begin(), values1.end());
+    std::random_shuffle(values2.begin(), values2.end());
+    for (unsigned_type i = 0; i < n_tests; i++) {
+        const_iterator cit1 = cmap.find(values1[i].first);
+        STXXL_CHECK(cit1 != cmap.end());
+        const_iterator cit2 = cmap.find(values2[i].first);
+        STXXL_CHECK(cit2 != cmap.end());
+        iterator it1 = map.find(values1[i].first);
+        STXXL_CHECK(it1 != map.end());
+        iterator it2 = map.find(values2[i].first);
+        STXXL_CHECK(it2 != map.end());
+
+        map.erase(it1);
+        map.erase(it2);
+        STXXL_CHECK(cit1 == cmap.end());
+        STXXL_CHECK(cit2 == cmap.end());
+    }
+    std::cout << "passed" << std::endl;
+
+    // --- erase by value (key)
+    std::cout << "Erase by key...";
+    for (unsigned_type i = 0; i < n_tests; i++) {
+        const_iterator cit1 = cmap.find(values1[i + n_tests].first);
+        STXXL_CHECK(cit1 != cmap.end());
+        const_iterator cit2 = cmap.find(values2[i + n_tests].first);
+        STXXL_CHECK(cit2 != cmap.end());
+
+        map.erase_oblivious(values1[i + n_tests].first);
+        map.erase_oblivious(values2[i + n_tests].first);
+        STXXL_CHECK(cit1 == cmap.end());
+        STXXL_CHECK(cit2 == cmap.end());
+    }
+    std::cout << "passed" << std::endl;
+
+    STXXL_MSG(stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void more_iterator_test()
+{
+    typedef std::pair<int, int> value_type;
+    const unsigned_type value_size = sizeof(value_type);
+
+    const unsigned_type n_values = 15000;
+
+    // make sure all changes will be buffered
+    const unsigned_type buffer_size = 5 * n_values * (value_size + sizeof(int*));
+    const unsigned_type mem_to_sort = 32 * 1024 * 1024;
+
+    const unsigned_type subblock_raw_size = 4 * 1024;
+    const unsigned_type block_size = 4;
+
+    typedef stxxl::hash_map::hash_map<int, int, hash_int, cmp,
+                                      subblock_raw_size, block_size> hash_map;
+    typedef hash_map::const_iterator const_iterator;
+
+    stxxl::stats_data stats_begin = *stxxl::stats::get_instance();
+
+    hash_map map;
+    map.max_buffer_size(buffer_size);
+    const hash_map& cmap = map;
+
+    // generate random values
+    stxxl::random_number32 rand32;
+    std::vector<value_type> values1(n_values);
+    std::vector<value_type> values2(n_values);
+    std::generate(values1.begin(), values1.end(), rand_pairs(rand32) _STXXL_FORCE_SEQUENTIAL);
+    std::generate(values2.begin(), values2.end(), rand_pairs(rand32) _STXXL_FORCE_SEQUENTIAL);
+
+    // --- initial import
+    map.insert(values1.begin(), values1.end(), mem_to_sort);
+    for (std::vector<value_type>::iterator val_it = values2.begin();
+         val_it != values2.end(); ++val_it)
+        map.insert_oblivious(*val_it);
+
+    // --- store some iterators, rebuild and check
+    std::cout << "Rebuild test...";
+    std::random_shuffle(values1.begin(), values1.end());
+    std::random_shuffle(values2.begin(), values2.end());
+    {
+        const_iterator cit1 = cmap.find(values1[17].first);
+        const_iterator cit2 = cmap.find(values2[19].first);
+        *cit1;
+        *cit2;
+        map.rehash();
+        STXXL_CHECK(map.size() == 2 * n_values);
+        STXXL_CHECK((*cit1).first == values1[17].first);
+        STXXL_CHECK((*cit2).first == values2[19].first);
+    }
+    std::cout << "passed" << std::endl;
+
+    // --- unusual cases while scanning
+    std::cout << "Another scan-test...";
+    {
+        const_iterator cit1 = cmap.find(values1[n_values / 2].first);
+        const_iterator cit2 = cit1;
+        ++cit1;
+        int key1 = (*cit1).first;
+        ++cit1;
+        int key2 = (*cit1).first;
+        map.erase_oblivious(key1);
+        map.insert_oblivious(value_type(key2, key2 + 2));
+
+        STXXL_CHECK((*cit1).second == key2 + 2);
+        ++cit2;
+        STXXL_CHECK(cit1 == cit2);
+    }
+    std::cout << "passed" << std::endl;
+
+    STXXL_MSG(stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+int main()
+{
+    cmp_with_internal_map();
+    basic_iterator_test();
+    more_iterator_test();
+    return 0;
+}
diff --git a/tests/containers/hash_map/test_hash_map_reader_writer.cpp b/tests/containers/hash_map/test_hash_map_reader_writer.cpp
new file mode 100644
index 0000000..e046c14
--- /dev/null
+++ b/tests/containers/hash_map/test_hash_map_reader_writer.cpp
@@ -0,0 +1,176 @@
+/***************************************************************************
+ *  tests/containers/hash_map/test_hash_map_reader_writer.cpp
+ *
+ *  Part of the STXXL. See http://stxxl.sourceforge.net
+ *
+ *  Copyright (C) 2007 Markus Westphal <marwes at users.sourceforge.net>
+ *
+ *  Distributed under the Boost Software License, Version 1.0.
+ *  (See accompanying file LICENSE_1_0.txt or copy at
+ *  http://www.boost.org/LICENSE_1_0.txt)
+ **************************************************************************/
+
+#include <iostream>
+
+#include <stxxl.h>
+#include <stxxl/bits/common/seed.h>
+#include <stxxl/bits/containers/hash_map/util.h>
+
+void reader_writer_test()
+{
+    typedef std::pair<unsigned, unsigned> value_type;
+
+    const unsigned subblock_raw_size = 1024 * 8; // 8KB subblocks
+    const unsigned block_size = 128;             // 1MB blocks (=128 subblocks)
+
+    const unsigned n_blocks = 64;                // number of blocks to use for this test
+    const unsigned cache_size = 8;               // size of cache in blocks
+
+    const unsigned buffer_size = 4;              // write buffer size in blocks
+
+    typedef stxxl::typed_block<subblock_raw_size, value_type> subblock_type;
+    typedef stxxl::typed_block<block_size* sizeof(subblock_type), subblock_type> block_type;
+
+    const unsigned subblock_size = subblock_type::size;  // size in values
+
+    typedef block_type::bid_type bid_type;
+    typedef std::vector<bid_type> bid_container_type;
+    typedef bid_container_type::iterator bid_iterator_type;
+
+    typedef stxxl::hash_map::block_cache<block_type> cache_type;
+
+    typedef stxxl::hash_map::buffered_writer<block_type, bid_container_type> writer_type;
+    typedef stxxl::hash_map::buffered_reader<cache_type, bid_iterator_type> reader_type;
+
+    bid_container_type bids;
+    cache_type cache(cache_size);
+
+    // plain writing
+    {
+        writer_type writer(&bids, buffer_size, buffer_size / 2);
+        unsigned i = 0;
+        for ( ; i < n_blocks * block_size * subblock_size; ++i)
+            writer.append(value_type(i, i));
+        writer.flush();
+
+        STXXL_CHECK(bids.size() >= n_blocks);
+
+        block_type* block = new block_type;
+        i = 0;
+        for (unsigned i_block = 0; i_block < n_blocks; i_block++) {
+            stxxl::request_ptr req = block->read(bids[i_block]);
+            req->wait();
+
+            for (unsigned inner = 0; inner < block_size * subblock_size; ++inner) {
+                STXXL_CHECK((*block)[inner / subblock_size][inner % subblock_size].first == i);
+                i++;
+            }
+        }
+        delete block;
+    }
+
+    // reading with/without prefetching
+    {
+        // last parameter disables prefetching
+        reader_type reader(bids.begin(), bids.end(), cache, 0, false);
+        for (unsigned i = 0; i < n_blocks * block_size * subblock_size; ++i) {
+            STXXL_CHECK(reader.const_value().first == i);
+            ++reader;
+        }
+
+        // prefetching enabled by default
+        reader_type reader2(bids.begin(), bids.end(), cache);
+        for (unsigned i = 0; i < n_blocks * block_size * subblock_size; ++i) {
+            STXXL_CHECK(reader2.const_value().first == i);
+            ++reader2;
+        }
+    }
+
+    // reading with skipping
+    {
+        // disable prefetching
+        reader_type reader(bids.begin(), bids.end(), cache, 0, false);
+
+        // I: first subblock
+        reader.skip_to(bids.begin() + 10, 0);
+        unsigned expected = block_size * subblock_size * 10 + subblock_size * 0;
+        STXXL_CHECK(reader.const_value().first == expected);
+
+        // II: subblock in the middle (same block)
+        reader.skip_to(bids.begin() + 10, 2);
+        expected = block_size * subblock_size * 10 + subblock_size * 2;
+        STXXL_CHECK(reader.const_value().first == expected);
+
+        // III: subblock in the middle (another block)
+        reader.skip_to(bids.begin() + 13, 1);
+        expected = block_size * subblock_size * 13 + subblock_size * 1;
+        STXXL_CHECK(reader.const_value().first == expected);
+    }
+
+    // reading with modifying access
+    {
+        reader_type reader(bids.begin(), bids.end(), cache);
+        for (unsigned i = 0; i < n_blocks * block_size * subblock_size; ++i) {
+            reader.value().second = reader.const_value().first + 1;
+            ++reader;
+        }
+
+        reader_type reader2(bids.begin(), bids.end(), cache);
+        for (unsigned i = 0; i < n_blocks * block_size * subblock_size; ++i) {
+            STXXL_CHECK(reader2.const_value().second == reader2.const_value().first + 1);
+            ++reader2;
+        }
+
+        cache.flush();
+        block_type* block = new block_type;
+        unsigned i = 0;
+        for (unsigned i_block = 0; i_block < n_blocks; i_block++) {
+            stxxl::request_ptr req = block->read(bids[i_block]);
+            req->wait();
+
+            for (unsigned inner = 0; inner < block_size * subblock_size; ++inner) {
+                STXXL_CHECK((*block)[inner / subblock_size][inner % subblock_size].first == i);
+                STXXL_CHECK((*block)[inner / subblock_size][inner % subblock_size].second == i + 1);
+                i++;
+            }
+        }
+        delete block;
+    }
+
+    //cache.dump_cache();
+
+    cache.clear();
+
+    // finishinging subblocks: skip second half of each subblock
+    {
+        writer_type writer(&bids, buffer_size, buffer_size / 2);
+        unsigned i = 0;
+        for (unsigned outer = 0; outer < n_blocks * block_size; ++outer) {
+            for (unsigned inner = 0; inner < subblock_size / 2; ++inner) {
+                writer.append(value_type(i, i));
+                ++i;
+            }
+            writer.finish_subblock();
+        }
+        writer.flush();
+
+        reader_type reader(bids.begin(), bids.end(), cache);
+        i = 0;
+        for (unsigned outer = 0; outer < n_blocks * block_size; ++outer) {
+            for (unsigned inner = 0; inner < subblock_size / 2; ++inner) {
+                STXXL_CHECK(reader.const_value().first == i);
+                ++i;
+                ++reader;
+            }
+            reader.next_subblock();
+        }
+    }
+
+    STXXL_MSG("Passed Reader-Writer Test");
+}
+
+int main()
+{
+    reader_writer_test();
+    return 0;
+}
diff --git a/tests/containers/test_ext_merger.cpp b/tests/containers/test_ext_merger.cpp
index fd55bab..8807911 100644
--- a/tests/containers/test_ext_merger.cpp
+++ b/tests/containers/test_ext_merger.cpp
@@ -18,7 +18,6 @@
 typedef int my_type;
 typedef stxxl::typed_block<4096, my_type> block_type;
 
-
 struct dummy_merger
 {
     int& cnt;
diff --git a/tests/containers/test_ext_merger2.cpp b/tests/containers/test_ext_merger2.cpp
index 5ce0ebc..93c7162 100644
--- a/tests/containers/test_ext_merger2.cpp
+++ b/tests/containers/test_ext_merger2.cpp
@@ -17,7 +17,6 @@
 typedef int my_type;
 typedef stxxl::typed_block<4096, my_type> block_type;
 
-
 struct dummy_merger
 {
     int current, delta;
@@ -117,7 +116,11 @@ int main()
         merger.multi_merge(output.begin(), output.begin());
 
         while (merger.size() > 0) {
-            stxxl::uint64 l = std::min<stxxl::uint64>(merger.size(), output.size());
+            stxxl::unsigned_type l =
+                std::min<stxxl::unsigned_type>(
+                    (stxxl::unsigned_type)merger.size(), output.size()
+                    );
+
             merger.multi_merge(output.begin(), output.begin() + l);
             STXXL_CHECK(stxxl::is_sorted(output.begin(), output.begin() + l));
             STXXL_MSG("merged " << l << " elements: (" << *output.begin() << ", ..., " << *(output.begin() + l - 1) << ")");
diff --git a/tests/containers/test_iterators.cpp b/tests/containers/test_iterators.cpp
index 747b3ea..736dc3e 100644
--- a/tests/containers/test_iterators.cpp
+++ b/tests/containers/test_iterators.cpp
@@ -17,7 +17,6 @@
 #include <vector>
 #include <stxxl.h>
 
-
 #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100)
 
 template <typename T>
@@ -348,7 +347,6 @@ void test_random_access_reverse(svt& sv)
     test_inc_dec_random(xsvri);
 }
 
-
 typedef float key_type;
 typedef double data_type;
 
@@ -364,7 +362,6 @@ struct cmp : public std::less<key_type>
     }
 };
 
-
 template <>
 struct modify<std::pair<const key_type, data_type> >
 {
diff --git a/tests/containers/test_many_stacks.cpp b/tests/containers/test_many_stacks.cpp
index e3376de..8746a38 100644
--- a/tests/containers/test_many_stacks.cpp
+++ b/tests/containers/test_many_stacks.cpp
@@ -16,7 +16,6 @@
 //! with \c stxxl::grow_shrink_stack implementation, \b four blocks per page,
 //! block size \b 4096 bytes
 
-
 #include <stxxl/stack>
 
 // forced instantiation
diff --git a/tests/containers/test_map.cpp b/tests/containers/test_map.cpp
index ba73019..71d6c33 100644
--- a/tests/containers/test_map.cpp
+++ b/tests/containers/test_map.cpp
@@ -53,7 +53,7 @@ int main(int argc, char** argv)
     for (int mult = 1; mult < max_mult; mult *= 2)
     {
         stats_begin = *stxxl::stats::get_instance();
-        const unsigned el = mult * (CACHE_ELEMENTS / 8);
+        const size_t el = mult * (CACHE_ELEMENTS / 8);
         STXXL_MSG("Elements to insert " << el << " volume =" <<
                   (el * (sizeof(key_type) + sizeof(data_type))) / 1024 << " KiB");
 
@@ -77,19 +77,19 @@ int main(int argc, char** argv)
 
         stats_begin = *stxxl::stats::get_instance();
         STXXL_MSG("Doing search");
-        unsigned queries = el / 16;
+        size_t queries = el / 16;
         const map_type& ConstMap = Map;
         stxxl::random_number32 myrandom;
         for (unsigned i = 0; i < queries; ++i)
         {
-            key_type key = myrandom() % el;
+            key_type key = (key_type)(myrandom() % el);
             map_type::const_iterator result = ConstMap.find(key);
             STXXL_CHECK((*result).second == key + 1);
             STXXL_CHECK(result->second == key + 1);
         }
         stats_elapsed = stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin;
         double reads = double(stats_elapsed.get_reads()) / logel;
-        double readsperq = double(stats_elapsed.get_reads()) / queries;
+        double readsperq = double(stats_elapsed.get_reads()) / (double)queries;
         STXXL_MSG("reads/logel " << reads << " readsperq " << readsperq);
         STXXL_MSG(stats_elapsed);
 
diff --git a/tests/containers/test_map_random.cpp b/tests/containers/test_map_random.cpp
index 1660f32..f22c909 100644
--- a/tests/containers/test_map_random.cpp
+++ b/tests/containers/test_map_random.cpp
@@ -20,7 +20,6 @@
 #include <stxxl/map>
 #include "map_test_handlers.h"
 
-
 typedef int key_type;
 typedef int data_type;
 
@@ -51,7 +50,6 @@ typedef stxxl::map<key_type, data_type, cmp2,
 #define PERCENT_FIND 100
 #define PERCENT_ITERATOR 100
 
-
 //#define MAX_KEY 1000
 #define MAX_KEY 10000
 
@@ -197,7 +195,6 @@ int main(int argc, char* argv[])
             if (xxliter != xxlmap.end())
                 xxlmap.erase(xxliter);
 
-
             STXXL_CHECK(stxxl::not_there(stdmap, key));
             STXXL_CHECK(stxxl::not_there(xxlmap, key));
         }
@@ -225,7 +222,6 @@ int main(int argc, char* argv[])
             if (lower > upper)
                 std::swap(lower, upper);
 
-
             vector_type v2(upper - lower);
             for (unsigned j = 0; j < (unsigned)(upper - lower); j++)
             {
diff --git a/tests/containers/test_migr_stack.cpp b/tests/containers/test_migr_stack.cpp
index 39d7d8d..bf02529 100644
--- a/tests/containers/test_migr_stack.cpp
+++ b/tests/containers/test_migr_stack.cpp
@@ -21,20 +21,20 @@
 
 // forced instantiation
 const unsigned critical_size = 8 * 4096;
-template class stxxl::STACK_GENERATOR<int, stxxl::migrating, stxxl::normal, 4, 4096, std::stack<int>, critical_size>;
+template class stxxl::STACK_GENERATOR<size_t, stxxl::migrating, stxxl::normal, 4, 4096, std::stack<size_t>, critical_size>;
 
 int main()
 {
-    typedef stxxl::STACK_GENERATOR<int, stxxl::migrating, stxxl::normal, 4, 4096, std::stack<int>, critical_size>::result migrating_stack_type;
+    typedef stxxl::STACK_GENERATOR<size_t, stxxl::migrating, stxxl::normal, 4, 4096, std::stack<size_t>, critical_size>::result migrating_stack_type;
 
     STXXL_MSG("Starting test.");
 
     migrating_stack_type my_stack;
-    int test_size = 1 * 1024 * 1024 / sizeof(int), i;
+    size_t test_size = 1 * 1024 * 1024 / sizeof(int);
 
     STXXL_MSG("Filling stack.");
 
-    for (i = 0; i < test_size; i++)
+    for (size_t i = 0; i < test_size; i++)
     {
         my_stack.push(i);
         STXXL_CHECK(my_stack.top() == i);
@@ -50,8 +50,9 @@ int main()
 
     STXXL_MSG("Removing elements from " <<
               (my_stack.external() ? "external" : "internal") << " stack");
-    for (i = test_size - 1; i >= 0; i--)
+    for (size_t i = test_size; i > 0; )
     {
+        --i;
         STXXL_CHECK(my_stack.top() == i);
         STXXL_CHECK(my_stack.size() == i + 1);
         my_stack.pop();
diff --git a/tests/containers/test_sorter.cpp b/tests/containers/test_sorter.cpp
index 8581b63..50682be 100644
--- a/tests/containers/test_sorter.cpp
+++ b/tests/containers/test_sorter.cpp
@@ -22,19 +22,20 @@ struct my_type
 {
     typedef unsigned key_type;
 
-    key_type _key;
-    char _data[RECORD_SIZE - sizeof(key_type)];
+    key_type m_key;
+    char m_data[RECORD_SIZE - sizeof(key_type)];
+
     key_type key() const
     {
-        return _key;
+        return m_key;
     }
 
     my_type() { }
-    my_type(key_type __key)
-        : _key(__key)
+    my_type(key_type k)
+        : m_key(k)
     {
 #if STXXL_WITH_VALGRIND
-        memset(_data, 0, sizeof(_data));
+        memset(m_data, 0, sizeof(m_data));
 #endif
     }
 
@@ -52,7 +53,7 @@ struct my_type
 
 std::ostream& operator << (std::ostream& o, const my_type& obj)
 {
-    o << obj._key;
+    o << obj.m_key;
     return o;
 }
 
diff --git a/tests/containers/test_stack.cpp b/tests/containers/test_stack.cpp
index 6f049bb..54160de 100644
--- a/tests/containers/test_stack.cpp
+++ b/tests/containers/test_stack.cpp
@@ -20,11 +20,10 @@
 #include <stxxl/stack>
 
 // forced instantiation
-template class stxxl::STACK_GENERATOR<int, stxxl::external, stxxl::normal, 4, 4096>;
-template class stxxl::STACK_GENERATOR<int, stxxl::migrating, stxxl::normal, 4, 4096>;
-template class stxxl::STACK_GENERATOR<int, stxxl::external, stxxl::grow_shrink, 4, 4096>;
-template class stxxl::STACK_GENERATOR<int, stxxl::external, stxxl::grow_shrink2, 1, 4096>;
-
+template class stxxl::STACK_GENERATOR<size_t, stxxl::external, stxxl::normal, 4, 4096>;
+template class stxxl::STACK_GENERATOR<size_t, stxxl::migrating, stxxl::normal, 4, 4096>;
+template class stxxl::STACK_GENERATOR<size_t, stxxl::external, stxxl::grow_shrink, 4, 4096>;
+template class stxxl::STACK_GENERATOR<size_t, stxxl::external, stxxl::grow_shrink2, 1, 4096>;
 
 template <typename stack_type>
 void test_lvalue_correctness(stack_type& stack, int a, int b)
@@ -42,35 +41,33 @@ void test_lvalue_correctness(stack_type& stack, int a, int b)
         stack.push(i);
     for (i = 0; i < b; ++i)
         stack.pop();
-    if ((stack.top() != int(0xbeeff00d))) {
+    if ((stack.top() != (size_t)(0xbeeff00d))) {
         STXXL_ERRMSG("STACK MISMATCH AFTER top() LVALUE MODIFICATION (0x" << std::hex << stack.top() << " != 0xbeeff00d)");
-        STXXL_CHECK(stack.top() == int(0xbeeff00d));
+        STXXL_CHECK(stack.top() == (size_t)(0xbeeff00d));
     }
     for (i = 0; i < a; ++i)
         stack.pop();
 }
 
-
 template <typename stack_type>
-void simple_test(stack_type& my_stack, int test_size)
+void simple_test(stack_type& my_stack, size_t test_size)
 {
-    int i;
-
-    for (i = 0; i < test_size; i++)
+    for (size_t i = 0; i < test_size; i++)
     {
         my_stack.push(i);
         STXXL_CHECK(my_stack.top() == i);
         STXXL_CHECK(my_stack.size() == i + 1);
     }
 
-    for (i = test_size - 1; i >= 0; i--)
+    for (size_t i = test_size; i > 0; )
     {
+        --i;
         STXXL_CHECK(my_stack.top() == i);
         my_stack.pop();
         STXXL_CHECK(my_stack.size() == i);
     }
 
-    for (i = 0; i < test_size; i++)
+    for (size_t i = 0; i < test_size; i++)
     {
         my_stack.push(i);
         STXXL_CHECK(my_stack.top() == i);
@@ -82,26 +79,28 @@ void simple_test(stack_type& my_stack, int test_size)
     std::swap(s2, my_stack);
     std::swap(s2, my_stack);
 
-    for (i = test_size - 1; i >= 0; i--)
+    for (size_t i = test_size; i > 0; )
     {
+        --i;
         STXXL_CHECK(my_stack.top() == i);
         my_stack.pop();
         STXXL_CHECK(my_stack.size() == i);
     }
 
-    std::stack<int> int_stack;
+    std::stack<size_t> int_stack;
 
-    for (i = 0; i < test_size; i++)
+    for (size_t i = 0; i < test_size; i++)
     {
         int_stack.push(i);
         STXXL_CHECK(int_stack.top() == i);
-        STXXL_CHECK(int(int_stack.size()) == i + 1);
+        STXXL_CHECK(int_stack.size() == i + 1);
     }
 
     stack_type my_stack1(int_stack);
 
-    for (i = test_size - 1; i >= 0; i--)
+    for (size_t i = test_size; i > 0; )
     {
+        --i;
         STXXL_CHECK(my_stack1.top() == i);
         my_stack1.pop();
         STXXL_CHECK(my_stack1.size() == i);
@@ -114,10 +113,10 @@ void simple_test(stack_type& my_stack, int test_size)
 
 int main(int argc, char* argv[])
 {
-    typedef stxxl::STACK_GENERATOR<int, stxxl::external, stxxl::normal, 4, 4096>::result ext_normal_stack_type;
-    typedef stxxl::STACK_GENERATOR<int, stxxl::migrating, stxxl::normal, 4, 4096>::result ext_migrating_stack_type;
-    typedef stxxl::STACK_GENERATOR<int, stxxl::external, stxxl::grow_shrink, 4, 4096>::result ext_stack_type;
-    typedef stxxl::STACK_GENERATOR<int, stxxl::external, stxxl::grow_shrink2, 1, 4096>::result ext_stack_type2;
+    typedef stxxl::STACK_GENERATOR<size_t, stxxl::external, stxxl::normal, 4, 4096>::result ext_normal_stack_type;
+    typedef stxxl::STACK_GENERATOR<size_t, stxxl::migrating, stxxl::normal, 4, 4096>::result ext_migrating_stack_type;
+    typedef stxxl::STACK_GENERATOR<size_t, stxxl::external, stxxl::grow_shrink, 4, 4096>::result ext_stack_type;
+    typedef stxxl::STACK_GENERATOR<size_t, stxxl::external, stxxl::grow_shrink2, 1, 4096>::result ext_stack_type2;
 
     if (argc < 2)
     {
@@ -141,23 +140,24 @@ int main(int argc, char* argv[])
         stxxl::read_write_pool<ext_stack_type2::block_type> pool(10, 10);
         // create a stack that does not prefetch (level of prefetch aggressiveness 0)
         ext_stack_type2 my_stack(pool, 0);
-        int test_size = atoi(argv[1]) * 4 * 4096 / sizeof(int), i;
+        size_t test_size = atoi(argv[1]) * 4 * 4096 / sizeof(int);
 
-        for (i = 0; i < test_size; i++)
+        for (size_t i = 0; i < test_size; i++)
         {
             my_stack.push(i);
             STXXL_CHECK(my_stack.top() == i);
             STXXL_CHECK(my_stack.size() == i + 1);
         }
         my_stack.set_prefetch_aggr(10);
-        for (i = test_size - 1; i >= 0; i--)
+        for (size_t i = test_size; i > 0; )
         {
+            --i;
             STXXL_CHECK(my_stack.top() == i);
             my_stack.pop();
             STXXL_CHECK(my_stack.size() == i);
         }
 
-        for (i = 0; i < test_size; i++)
+        for (size_t i = 0; i < test_size; i++)
         {
             my_stack.push(i);
             STXXL_CHECK(my_stack.top() == i);
@@ -169,8 +169,9 @@ int main(int argc, char* argv[])
         std::swap(s2, my_stack);
         std::swap(s2, my_stack);
 
-        for (i = test_size - 1; i >= 0; i--)
+        for (size_t i = test_size; i > 0; )
         {
+            --i;
             STXXL_CHECK(my_stack.top() == i);
             my_stack.pop();
             STXXL_CHECK(my_stack.size() == i);
diff --git a/tests/containers/test_vector.cpp b/tests/containers/test_vector.cpp
index b75147e..eebcd83 100644
--- a/tests/containers/test_vector.cpp
+++ b/tests/containers/test_vector.cpp
@@ -21,7 +21,6 @@
 #include <stxxl/vector>
 #include <stxxl/scan>
 
-
 struct element  // 24 bytes, not a power of 2 intentionally
 {
     stxxl::int64 key;
@@ -102,7 +101,6 @@ void test_vector1()
         STXXL_CHECK(v[i].key == stxxl::int64(i + offset));
     }
 
-
     // fill the vector with random numbers
     stxxl::generate(v.begin(), v.end(), stxxl::random_number32(), 4);
     v.flush();
diff --git a/tests/containers/test_vector_buf.cpp b/tests/containers/test_vector_buf.cpp
index 8032136..efd6502 100644
--- a/tests/containers/test_vector_buf.cpp
+++ b/tests/containers/test_vector_buf.cpp
@@ -57,8 +57,8 @@ class MyStream
 public:
     typedef ValueType value_type;
 
-    MyStream() :
-        i(0)
+    MyStream()
+        : i(0)
     { }
 
     value_type operator * () const
@@ -121,6 +121,20 @@ void test_vector_buf(uint64 size)
 
         check_vector(vec);
     }
+    {   // fill empty vector using vector_bufwriter
+        stxxl::scoped_print_timer tm("empty vector_bufwriter");
+
+        vector_type vec;
+
+        typename vector_type::bufwriter_type writer(vec);
+
+        for (uint64 i = 0; i < size; ++i)
+            writer << ValueType(i);
+
+        writer.finish();
+
+        check_vector(vec);
+    }
 
     vector_type vec(size);
 
diff --git a/tests/containers/test_vector_export.cpp b/tests/containers/test_vector_export.cpp
index eb307ff..d8c0a96 100644
--- a/tests/containers/test_vector_export.cpp
+++ b/tests/containers/test_vector_export.cpp
@@ -20,7 +20,6 @@
 
 typedef stxxl::int64 int64;
 
-
 int main()
 {
     // use non-randomized striping to avoid side effects on random generator
diff --git a/tests/containers/test_vector_sizes.cpp b/tests/containers/test_vector_sizes.cpp
index b0b3a64..f65fb5f 100644
--- a/tests/containers/test_vector_sizes.cpp
+++ b/tests/containers/test_vector_sizes.cpp
@@ -13,7 +13,6 @@
 #include <stxxl/io>
 #include <stxxl/vector>
 
-
 typedef int my_type;
 typedef stxxl::VECTOR_GENERATOR<my_type>::result vector_type;
 typedef vector_type::block_type block_type;
diff --git a/tests/io/CMakeLists.txt b/tests/io/CMakeLists.txt
index bbe5c35..bd77dc5 100644
--- a/tests/io/CMakeLists.txt
+++ b/tests/io/CMakeLists.txt
@@ -17,7 +17,7 @@ stxxl_build_test(test_io_sizes)
 stxxl_test(test_io "${STXXL_TMPDIR}")
 
 stxxl_test(test_cancel syscall "${STXXL_TMPDIR}/testdisk1")
-# FIXME: clean up after fileperblock_syscall
+# TODO: clean up after fileperblock_syscall
 stxxl_test(test_cancel fileperblock_syscall "${STXXL_TMPDIR}/testdisk1")
 if(STXXL_HAVE_MMAP_FILE)
   stxxl_test(test_cancel mmap "${STXXL_TMPDIR}/testdisk1")
@@ -25,18 +25,24 @@ if(STXXL_HAVE_MMAP_FILE)
   #-tb: fails randomly (due to I/O cancelation order)
   #stxxl_test(test_cancel simdisk "${STXXL_TMPDIR}/testdisk1")
 endif(STXXL_HAVE_MMAP_FILE)
+if(STXXL_HAVE_LINUXAIO_FILE)
+  stxxl_test(test_cancel linuxaio "${STXXL_TMPDIR}/testdisk1")
+endif(STXXL_HAVE_LINUXAIO_FILE)
 if(USE_BOOST)
   stxxl_test(test_cancel boostfd "${STXXL_TMPDIR}/testdisk1")
   stxxl_test(test_cancel fileperblock_boostfd "${STXXL_TMPDIR}/testdisk1")
 endif(USE_BOOST)
 stxxl_test(test_cancel memory "${STXXL_TMPDIR}/testdisk1")
-# FIXME: asserts!
+# TODO: asserts!
 #stxxl_test(test_cancel wbtl "${STXXL_TMPDIR}/testdisk1")
 
 stxxl_test(test_io_sizes syscall "${STXXL_TMPDIR}/testdisk1" 1073741824)
 if(STXXL_HAVE_MMAP_FILE)
   stxxl_test(test_io_sizes mmap "${STXXL_TMPDIR}/testdisk1" 1073741824)
 endif(STXXL_HAVE_MMAP_FILE)
+if(STXXL_HAVE_LINUXAIO_FILE)
+  stxxl_test(test_io_sizes linuxaio "${STXXL_TMPDIR}/testdisk1" 1073741824)
+endif(STXXL_HAVE_LINUXAIO_FILE)
 if(USE_BOOST)
   stxxl_test(test_io_sizes boostfd "${STXXL_TMPDIR}/testdisk1" 1073741824)
 endif(USE_BOOST)
diff --git a/tests/io/test_cancel.cpp b/tests/io/test_cancel.cpp
index 4f9fa56..6764dde 100644
--- a/tests/io/test_cancel.cpp
+++ b/tests/io/test_cancel.cpp
@@ -36,12 +36,15 @@ int main(int argc, char** argv)
         return -1;
     }
 
-    const stxxl::uint64 size = 64 * 1024 * 1024, num_blocks = 16;
+    const stxxl::uint64 size = 16 * 1024 * 1024, num_blocks = 16;
     char* buffer = (char*)stxxl::aligned_alloc<4096>(size);
     memset(buffer, 0, size);
 
     stxxl::compat_unique_ptr<stxxl::file>::result file(
-        stxxl::create_file(argv[1], argv[2], stxxl::file::CREAT | stxxl::file::RDWR | stxxl::file::DIRECT));
+        stxxl::create_file(
+            argv[1], argv[2],
+            stxxl::file::CREAT | stxxl::file::RDWR | stxxl::file::DIRECT)
+        );
 
     file->set_size(num_blocks * size);
     stxxl::request_ptr req[num_blocks];
diff --git a/tests/io/test_io.cpp b/tests/io/test_io.cpp
index 90eb016..edad472 100644
--- a/tests/io/test_io.cpp
+++ b/tests/io/test_io.cpp
@@ -22,7 +22,6 @@
 
 using stxxl::file;
 
-
 struct my_handler
 {
     void operator () (stxxl::request* ptr)
diff --git a/tests/io/test_io_sizes.cpp b/tests/io/test_io_sizes.cpp
index a28eefd..38d1fd1 100644
--- a/tests/io/test_io_sizes.cpp
+++ b/tests/io/test_io_sizes.cpp
@@ -17,7 +17,6 @@
 //! \example io/test_io_sizes.cpp
 //! This tests the maximum chunk size that a file type can handle with a single request.
 
-
 int main(int argc, char** argv)
 {
     if (argc < 4)
@@ -26,20 +25,24 @@ int main(int argc, char** argv)
         return -1;
     }
 
+    using stxxl::unsigned_type;
     using stxxl::uint64;
 
-    uint64 max_size = stxxl::atouint64(argv[3]);
+    unsigned_type max_size = atoi(argv[3]);
     uint64* buffer = (uint64*)stxxl::aligned_alloc<4096>(max_size);
 
     try
     {
         stxxl::compat_unique_ptr<stxxl::file>::result file(
-            stxxl::create_file(argv[1], argv[2], stxxl::file::CREAT | stxxl::file::RDWR | stxxl::file::DIRECT));
+            stxxl::create_file(
+                argv[1], argv[2],
+                stxxl::file::CREAT | stxxl::file::RDWR | stxxl::file::DIRECT)
+            );
         file->set_size(max_size);
 
         stxxl::request_ptr req;
         stxxl::stats_data stats1(*stxxl::stats::get_instance());
-        for (uint64 size = 4096; size < max_size; size *= 2)
+        for (unsigned_type size = 4096; size < max_size; size *= 2)
         {
             //generate data
             for (uint64 i = 0; i < size / sizeof(uint64); ++i)
@@ -47,7 +50,7 @@ int main(int argc, char** argv)
 
             //write
             STXXL_MSG(stxxl::add_IEC_binary_multiplier(size, "B") << "are being written at once");
-            req = file->awrite(buffer, 0, size, stxxl::default_completion_handler());
+            req = file->awrite(buffer, 0, size);
             wait_all(&req, 1);
 
             //fill with wrong data
@@ -56,7 +59,7 @@ int main(int argc, char** argv)
 
             //read again
             STXXL_MSG(stxxl::add_IEC_binary_multiplier(size, "B") << "are being read at once");
-            req = file->aread(buffer, 0, size, stxxl::default_completion_handler());
+            req = file->aread(buffer, 0, size);
             wait_all(&req, 1);
 
             //check
diff --git a/tests/io/test_mmap.cpp b/tests/io/test_mmap.cpp
index ed5f526..f819ea7 100644
--- a/tests/io/test_mmap.cpp
+++ b/tests/io/test_mmap.cpp
@@ -24,7 +24,7 @@ struct my_handler
 void testIO()
 {
     const int size = 1024 * 384;
-    char* buffer = static_cast<char*>(stxxl::aligned_alloc<BLOCK_ALIGN>(size));
+    char* buffer = static_cast<char*>(stxxl::aligned_alloc<STXXL_BLOCK_ALIGN>(size));
     memset(buffer, 0, size);
 #if STXXL_WINDOWS
     const char* paths[2] = { "data1", "data2" };
@@ -43,7 +43,7 @@ void testIO()
 
     stxxl::wait_all(req, 16);
 
-    stxxl::aligned_dealloc<BLOCK_ALIGN>(buffer);
+    stxxl::aligned_dealloc<STXXL_BLOCK_ALIGN>(buffer);
 
 #if !STXXL_WINDOWS
     file1.close_remove();
diff --git a/tests/io/test_sim_disk.cpp b/tests/io/test_sim_disk.cpp
index 0f9140f..6f638e2 100644
--- a/tests/io/test_sim_disk.cpp
+++ b/tests/io/test_sim_disk.cpp
@@ -21,13 +21,12 @@
 using stxxl::file;
 using stxxl::timestamp;
 
-
 int main()
 {
     const stxxl::int64 disk_size = stxxl::int64(1024 * 1024) * 1024 * 40;
     std::cout << sizeof(void*) << std::endl;
     const int block_size = 4 * 1024 * 1024;
-    char* buffer = static_cast<char*>(stxxl::aligned_alloc<BLOCK_ALIGN>(block_size));
+    char* buffer = static_cast<char*>(stxxl::aligned_alloc<STXXL_BLOCK_ALIGN>(block_size));
     memset(buffer, 0, block_size);
     const char* paths[2] = { "/tmp/data1", "/tmp/data2" };
     stxxl::sim_disk_file file1(paths[0], file::CREAT | file::RDWR | file::DIRECT, 0);
@@ -42,13 +41,13 @@ int main()
 
     stxxl::request_ptr req;
 
-    STXXL_MSG("Estimated time:" << block_size / double(AVERAGE_SPEED));
+    STXXL_MSG("Estimated time:" << block_size / stxxl::simdisk_geometry::s_average_speed);
     STXXL_MSG("Sequential write");
 
     for (i = 0; i < 40; i++)
     {
         double begin = timestamp();
-        req = file1.awrite(buffer, pos, block_size, stxxl::default_completion_handler());
+        req = file1.awrite(buffer, pos, block_size);
         req->wait();
         double end = timestamp();
 
@@ -65,7 +64,7 @@ int main()
     {
         pos = (stxxl::int64)rnd(disk_size / block_size) * block_size;
         double begin = timestamp();
-        req = file1.awrite(buffer, pos, block_size, stxxl::default_completion_handler());
+        req = file1.awrite(buffer, pos, block_size);
         req->wait();
         double diff = timestamp() - begin;
 
@@ -81,7 +80,7 @@ int main()
     double err = sqrt(sum2 - sum * sum);
     STXXL_MSG("Standard Deviation: " << err << " s, " << 100. * (err / sum) << " %");
 
-    stxxl::aligned_dealloc<BLOCK_ALIGN>(buffer);
+    stxxl::aligned_dealloc<STXXL_BLOCK_ALIGN>(buffer);
 
     file1.close_remove();
     file2.close_remove();
diff --git a/tests/mng/test_block_manager.cpp b/tests/mng/test_block_manager.cpp
index 34fee4e..40f689f 100644
--- a/tests/mng/test_block_manager.cpp
+++ b/tests/mng/test_block_manager.cpp
@@ -63,7 +63,6 @@ int main()
     for (i = 0; i < nblocks; ++i)
         reqs[i] = block->write(bids[i], my_handler());
 
-
     std::cout << "Waiting " << std::endl;
     stxxl::wait_all(reqs, nblocks);
 
@@ -79,7 +78,6 @@ int main()
         }
     }
 
-
     bm->delete_blocks(bids.begin(), bids.end());
 
     delete[] reqs;
diff --git a/tests/mng/test_block_manager1.cpp b/tests/mng/test_block_manager1.cpp
index 540af7d..7458990 100644
--- a/tests/mng/test_block_manager1.cpp
+++ b/tests/mng/test_block_manager1.cpp
@@ -13,7 +13,6 @@
 #include <stxxl/request>
 #include <stxxl/mng>
 
-
 int main()
 {
     typedef stxxl::typed_block<128* 1024, double> block_type;
diff --git a/tests/mng/test_block_manager2.cpp b/tests/mng/test_block_manager2.cpp
index e5a34d2..bdca0ef 100644
--- a/tests/mng/test_block_manager2.cpp
+++ b/tests/mng/test_block_manager2.cpp
@@ -25,7 +25,9 @@ int main()
 
     for (size_t i = 0; i < config->disks_number(); ++i)
         totalsize += config->disk_size(i);
-    stxxl::unsigned_type totalblocks = totalsize / block_type::raw_size;
+
+    stxxl::unsigned_type totalblocks =
+        (stxxl::unsigned_type)(totalsize / block_type::raw_size);
 
     STXXL_MSG("external memory: " << totalsize << " bytes  ==  " << totalblocks << " blocks");
 
diff --git a/tests/mng/test_bmlayer.cpp b/tests/mng/test_bmlayer.cpp
index cea2b2c..dd5e1ca 100644
--- a/tests/mng/test_bmlayer.cpp
+++ b/tests/mng/test_bmlayer.cpp
@@ -56,7 +56,6 @@ void testIO()
     for (i = 0; i < nblocks; ++i)
         reqs[i] = block->write(bids[i], my_handler());
 
-
     std::cout << "Waiting " << std::endl;
     stxxl::wait_all(reqs, nblocks);
 
@@ -98,7 +97,6 @@ void testIO2()
     delete[] blocks;
 }
 
-
 void testPrefetchPool()
 {
     stxxl::prefetch_pool<block_type> pool(2);
diff --git a/tests/mng/test_buf_streams.cpp b/tests/mng/test_buf_streams.cpp
index e3df26c..9822371 100644
--- a/tests/mng/test_buf_streams.cpp
+++ b/tests/mng/test_buf_streams.cpp
@@ -19,7 +19,6 @@
 #include <stxxl/bits/mng/buf_istream.h>
 #include <stxxl/bits/mng/buf_istream_reverse.h>
 
-
 #define BLOCK_SIZE (1024 * 512)
 
 typedef stxxl::typed_block<BLOCK_SIZE, unsigned> block_type;
diff --git a/tests/stream/test_loop.cpp b/tests/stream/test_loop.cpp
index a73c3f1..9901cc0 100644
--- a/tests/stream/test_loop.cpp
+++ b/tests/stream/test_loop.cpp
@@ -183,13 +183,14 @@ struct shuffle
                 stxxl::uint64 combined = current;
                 combined = combined << 32 | *input;
                 combined = (1ul << count_bits(combined)) - 1;
-                current = combined >> 32;
+                current = (value_type)(combined >> 32);
                 next = (value_type)combined;
             }
         }
     }
 
-    shuffle(Input& _input) : input(_input), current(0), next(0), even(true), is_empty(false)
+    shuffle(Input& _input)
+        : input(_input), current(0), next(0), even(true), is_empty(false)
     {
         apply_shuffle();
     }
diff --git a/tests/stream/test_naive_transpose.cpp b/tests/stream/test_naive_transpose.cpp
index e7d7cb5..13800d9 100644
--- a/tests/stream/test_naive_transpose.cpp
+++ b/tests/stream/test_naive_transpose.cpp
@@ -25,7 +25,6 @@
 #include <stxxl/stream>
 #include <stxxl/vector>
 
-
 class streamop_matrix_transpose
 {
     unsigned cut, repeat;
@@ -125,11 +124,7 @@ int main()
 
     // HERE streaming part begins (streamifying)
     // create input stream
-#if STXXL_WINDOWS
     typedef stxxl::stream::streamify_traits<array_type::iterator>::stream_type input_stream_type;
-#else
-    typedef __typeof__ (stxxl::stream::streamify(input.begin(), input.end())) input_stream_type;
-#endif
     input_stream_type input_stream = stxxl::stream::streamify(input.begin(), input.end(), numbuffers);
 
     // create stream of destination indices
diff --git a/tests/stream/test_sorted_runs.cpp b/tests/stream/test_sorted_runs.cpp
index 43554b0..a96b4e8 100644
--- a/tests/stream/test_sorted_runs.cpp
+++ b/tests/stream/test_sorted_runs.cpp
@@ -42,7 +42,6 @@ struct Cmp : public std::binary_function<value_type, value_type, bool>
     }
 };
 
-
 int main()
 {
 #if STXXL_PARALLEL_MULTIWAY_MERGE
diff --git a/tests/stream/test_stream.cpp b/tests/stream/test_stream.cpp
index 848c785..6dd9242 100644
--- a/tests/stream/test_stream.cpp
+++ b/tests/stream/test_stream.cpp
@@ -22,7 +22,6 @@
 #include <stxxl/stream>
 #include <stxxl/vector>
 
-
 #define USE_FORMRUNS_N_MERGE    // comment if you want to use one 'sort' algorithm
                                 // without producing intermediate sorted runs.
 
@@ -56,10 +55,8 @@ using stxxl::stream::streamify_traits;
 using stxxl::stream::make_tuple;
 using stxxl::tuple;
 
-
 const char* phrase = "Hasta la vista, baby";
 
-
 template <class Container_, class It_>
 void fill_input_array(Container_& container, It_ p)
 {
@@ -153,21 +150,11 @@ int main()
 
     // HERE streaming part begins (streamifying)
     // create input stream
-#if STXXL_MSVC
     typedef streamify_traits<input_array_type::iterator>::stream_type input_stream_type;
-#else
-    typedef __typeof__ (streamify (input.begin(), input.end())) input_stream_type;
-#endif
-
     input_stream_type input_stream = streamify(input.begin(), input.end());
 
-
     // create counter stream
-#if STXXL_WINDOWS
     typedef stxxl::stream::generator2stream<counter_type> counter_stream_type;
-#else
-    typedef __typeof__ (streamify (counter_type())) counter_stream_type;
-#endif
     counter_stream_type counter_stream = streamify(counter_type());
 
     // create tuple stream
@@ -199,7 +186,6 @@ int main()
     // or materialize(sorted_stream,output.begin());
     STXXL_CHECK(o == output.end());
 
-
     STXXL_MSG("input string (character,position) :");
     for (unsigned i = 0; i < input.size(); ++i)
     {
diff --git a/tests/stream/test_stream1.cpp b/tests/stream/test_stream1.cpp
index f9f2bb6..cfae84c 100644
--- a/tests/stream/test_stream1.cpp
+++ b/tests/stream/test_stream1.cpp
@@ -13,7 +13,6 @@
 #include <limits>
 #include <stxxl/stream>
 
-
 struct Input
 {
     typedef unsigned value_type;
@@ -74,7 +73,7 @@ int main()
     std::cout << *s;
 
     STXXL_MSG("Size of block type " << sizeof(CreateRunsAlg::block_type));
-    unsigned size = MULT * 1024 * 128 / (sizeof(Input::value_type) * 2);
+    unsigned size = MULT * 1024 * 128 / (unsigned)(sizeof(Input::value_type) * 2);
     Input in(size + 1);
     CreateRunsAlg SortedRuns(in, Cmp(), 1024 * 128 * MULT);
     SortedRunsType Runs = SortedRuns.result();
diff --git a/tools/benchmark_disks.cpp b/tools/benchmark_disks.cpp
index 17fe81c..a17308d 100644
--- a/tools/benchmark_disks.cpp
+++ b/tools/benchmark_disks.cpp
@@ -34,6 +34,8 @@
 #include <stxxl/bits/common/cmdline.h>
 
 using stxxl::timestamp;
+using stxxl::unsigned_type;
+using stxxl::uint64;
 
 #ifdef BLOCK_ALIGN
  #undef BLOCK_ALIGN
@@ -45,24 +47,25 @@ using stxxl::timestamp;
 
 #define CHECK_AFTER_READ 0
 
-#define MB (1024 * 1024)
+#define KiB (1024)
+#define MiB (1024 * 1024)
 
-template <typename AllocStrategy>
-int benchmark_disks_alloc(stxxl::uint64 length, stxxl::uint64 batch_size,
-                          std::string optrw)
+template <unsigned_type RawBlockSize, typename AllocStrategy>
+int benchmark_disks_blocksize_alloc(uint64 length, uint64 batch_size,
+                                    std::string optrw)
 {
-    stxxl::uint64 offset = 0, endpos = offset + length;
+    uint64 offset = 0, endpos = offset + length;
 
     if (length == 0)
-        endpos = std::numeric_limits<stxxl::uint64>::max();
+        endpos = std::numeric_limits<uint64>::max();
 
     bool do_read = (optrw.find('r') != std::string::npos);
     bool do_write = (optrw.find('w') != std::string::npos);
 
     // construct block type
 
-    const unsigned raw_block_size = 8 * MB;
-    const unsigned block_size = raw_block_size / sizeof(int);
+    const unsigned_type raw_block_size = RawBlockSize;
+    const unsigned_type block_size = raw_block_size / sizeof(int);
 
     typedef stxxl::typed_block<raw_block_size, unsigned> block_type;
     typedef stxxl::BID<raw_block_size> BID_type;
@@ -73,7 +76,7 @@ int benchmark_disks_alloc(stxxl::uint64 length, stxxl::uint64 batch_size,
     // calculate total bytes processed in a batch
     batch_size = raw_block_size * batch_size;
 
-    stxxl::uint64 num_blocks_per_batch = stxxl::div_ceil(batch_size, raw_block_size);
+    unsigned_type num_blocks_per_batch = (unsigned_type)stxxl::div_ceil(batch_size, raw_block_size);
     batch_size = num_blocks_per_batch * raw_block_size;
 
     block_type* buffer = new block_type[num_blocks_per_batch];
@@ -92,7 +95,7 @@ int benchmark_disks_alloc(stxxl::uint64 length, stxxl::uint64 batch_size,
     // touch data, so it is actually allcoated
     for (unsigned j = 0; j < num_blocks_per_batch; ++j)
         for (unsigned i = 0; i < block_size; ++i)
-            buffer[j][i] = j * block_size + i;
+            buffer[j][i] = (unsigned)(j * block_size + i);
 
     try {
         AllocStrategy alloc;
@@ -102,11 +105,11 @@ int benchmark_disks_alloc(stxxl::uint64 length, stxxl::uint64 batch_size,
 #if CHECK_AFTER_READ
             const stxxl::int64 current_batch_size_int = current_batch_size / sizeof(int);
 #endif
-            const stxxl::uint64 current_num_blocks_per_batch = stxxl::div_ceil(current_batch_size, raw_block_size);
+            const unsigned_type current_num_blocks_per_batch = (unsigned_type)stxxl::div_ceil(current_batch_size, raw_block_size);
 
-            std::cout << "Offset    " << std::setw(7) << offset / MB << " MiB: " << std::fixed;
+            std::cout << "Offset    " << std::setw(7) << offset / MiB << " MiB: " << std::fixed;
 
-            stxxl::unsigned_type num_total_blocks = blocks.size();
+            unsigned_type num_total_blocks = blocks.size();
             blocks.resize(num_total_blocks + current_num_blocks_per_batch);
             stxxl::block_manager::get_instance()->new_blocks(alloc, blocks.begin() + num_total_blocks, blocks.end());
 
@@ -127,8 +130,7 @@ int benchmark_disks_alloc(stxxl::uint64 length, stxxl::uint64 batch_size,
             else
                 elapsed = 0.0;
 
-            std::cout << std::setw(5) << std::setprecision(1) << (double(current_batch_size) / MB / elapsed) << " MiB/s write, ";
-
+            std::cout << std::setw(5) << std::setprecision(1) << (double(current_batch_size) / MiB / elapsed) << " MiB/s write, ";
 
             begin = timestamp();
 
@@ -147,7 +149,7 @@ int benchmark_disks_alloc(stxxl::uint64 length, stxxl::uint64 batch_size,
             else
                 elapsed = 0.0;
 
-            std::cout << std::setw(5) << std::setprecision(1) << (double(current_batch_size) / MB / elapsed) << " MiB/s read" << std::endl;
+            std::cout << std::setw(5) << std::setprecision(1) << (double(current_batch_size) / MiB / elapsed) << " MiB/s read" << std::endl;
 
 #if CHECK_AFTER_READ
             for (unsigned j = 0; j < current_num_blocks_per_batch; j++)
@@ -179,9 +181,9 @@ int benchmark_disks_alloc(stxxl::uint64 length, stxxl::uint64 batch_size,
     }
 
     std::cout << "=============================================================================================" << std::endl;
-    std::cout << "# Average over " << std::setw(7) << totalsizewrite / MB << " MiB: ";
-    std::cout << std::setw(5) << std::setprecision(1) << (double(totalsizewrite) / MB / totaltimewrite) << " MiB/s write, ";
-    std::cout << std::setw(5) << std::setprecision(1) << (double(totalsizeread) / MB / totaltimeread) << " MiB/s read" << std::endl;
+    std::cout << "# Average over " << std::setw(7) << totalsizewrite / MiB << " MiB: ";
+    std::cout << std::setw(5) << std::setprecision(1) << (double(totalsizewrite) / MiB / totaltimewrite) << " MiB/s write, ";
+    std::cout << std::setw(5) << std::setprecision(1) << (double(totalsizeread) / MiB / totaltimeread) << " MiB/s read" << std::endl;
 
     delete[] reqs;
     delete[] buffer;
@@ -189,21 +191,69 @@ int benchmark_disks_alloc(stxxl::uint64 length, stxxl::uint64 batch_size,
     return 0;
 }
 
+template <typename AllocStrategy>
+int benchmark_disks_alloc(uint64 length, uint64 batch_size,
+                          unsigned_type block_size,
+                          std::string optrw)
+{
+#define run(bs) benchmark_disks_blocksize_alloc<bs, AllocStrategy>(length, batch_size, optrw)
+    if (block_size == 4 * KiB)
+        run(4 * KiB);
+    else if (block_size == 8 * KiB)
+        run(8 * KiB);
+    else if (block_size == 16 * KiB)
+        run(16 * KiB);
+    else if (block_size == 32 * KiB)
+        run(32 * KiB);
+    else if (block_size == 64 * KiB)
+        run(64 * KiB);
+    else if (block_size == 128 * KiB)
+        run(128 * KiB);
+    else if (block_size == 256 * KiB)
+        run(256 * KiB);
+    else if (block_size == 512 * KiB)
+        run(512 * KiB);
+    else if (block_size == 1 * MiB)
+        run(1 * MiB);
+    else if (block_size == 2 * MiB)
+        run(2 * MiB);
+    else if (block_size == 4 * MiB)
+        run(4 * MiB);
+    else if (block_size == 8 * MiB)
+        run(8 * MiB);
+    else if (block_size == 16 * MiB)
+        run(16 * MiB);
+    else if (block_size == 32 * MiB)
+        run(32 * MiB);
+    else if (block_size == 64 * MiB)
+        run(64 * MiB);
+    else if (block_size == 128 * MiB)
+        run(128 * MiB);
+    else
+        std::cerr << "Unsupported block_size " << block_size << "." << std::endl
+                  << "Available are only powers of two from 4 KiB to 128 MiB. You must use 'ki' instead of 'k'." << std::endl;
+#undef run
+
+    return 0;
+}
+
 int benchmark_disks(int argc, char* argv[])
 {
     // parse command line
 
     stxxl::cmdline_parser cp;
 
-    stxxl::uint64 length = 0;
+    uint64 length = 0;
     unsigned int batch_size = 0;
+    unsigned_type block_size = 8 * MiB;
     std::string optrw = "rw", allocstr;
 
     cp.add_param_bytes("size", "Amount of data to write/read from disks (e.g. 10GiB)", length);
     cp.add_opt_param_string("r|w", "Only read or write blocks (default: both write and read)", optrw);
     cp.add_opt_param_string("alloc", "Block allocation strategy: RC, SR, FR, striping. (default: RC)", allocstr);
 
-    cp.add_uint('b', "batch", "Number of blocks written/read in one batch (default: D * 8MiB)", batch_size);
+    cp.add_uint('b', "batch", "Number of blocks written/read in one batch (default: D * B)", batch_size);
+    cp.add_bytes('B', "block_size", "Size of blocks written in one syscall. (default: B = 8MiB)", block_size);
 
     cp.set_description(
         "This program will benchmark the disks configured by the standard "
@@ -221,18 +271,18 @@ int benchmark_disks(int argc, char* argv[])
     if (allocstr.size())
     {
         if (allocstr == "RC")
-            return benchmark_disks_alloc<stxxl::RC>(length, batch_size, optrw);
+            return benchmark_disks_alloc<stxxl::RC>(length, batch_size, block_size, optrw);
         if (allocstr == "SR")
-            return benchmark_disks_alloc<stxxl::SR>(length, batch_size, optrw);
+            return benchmark_disks_alloc<stxxl::SR>(length, batch_size, block_size, optrw);
         if (allocstr == "FR")
-            return benchmark_disks_alloc<stxxl::FR>(length, batch_size, optrw);
+            return benchmark_disks_alloc<stxxl::FR>(length, batch_size, block_size, optrw);
         if (allocstr == "striping")
-            return benchmark_disks_alloc<stxxl::striping>(length, batch_size, optrw);
+            return benchmark_disks_alloc<stxxl::striping>(length, batch_size, block_size, optrw);
 
         std::cout << "Unknown allocation strategy '" << allocstr << "'" << std::endl;
         cp.print_usage();
         return -1;
     }
 
-    return benchmark_disks_alloc<STXXL_DEFAULT_ALLOC_STRATEGY>(length, batch_size, optrw);
+    return benchmark_disks_alloc<STXXL_DEFAULT_ALLOC_STRATEGY>(length, batch_size, block_size, optrw);
 }
diff --git a/tools/benchmark_disks_random.cpp b/tools/benchmark_disks_random.cpp
index ba46b21..71e304f 100644
--- a/tools/benchmark_disks_random.cpp
+++ b/tools/benchmark_disks_random.cpp
@@ -55,10 +55,12 @@ void run_test(stxxl::int64 span, stxxl::int64 worksize, bool do_init, bool do_re
     typedef stxxl::typed_block<raw_block_size, unsigned> block_type;
     typedef stxxl::BID<raw_block_size> BID_type;
 
-    stxxl::int64 num_blocks = stxxl::div_ceil(worksize, raw_block_size);
-    stxxl::int64 num_blocks_in_span = stxxl::div_ceil(span, raw_block_size);
-    num_blocks = stxxl::STXXL_MIN(num_blocks, num_blocks_in_span);
+    stxxl::unsigned_type num_blocks =
+        (stxxl::unsigned_type)stxxl::div_ceil(worksize, raw_block_size);
+    stxxl::unsigned_type num_blocks_in_span =
+        (stxxl::unsigned_type)stxxl::div_ceil(span, raw_block_size);
 
+    num_blocks = stxxl::STXXL_MIN(num_blocks, num_blocks_in_span);
     if (num_blocks == 0) num_blocks = num_blocks_in_span;
 
     worksize = num_blocks * raw_block_size;
diff --git a/tools/benchmark_files.cpp b/tools/benchmark_files.cpp
index 7af573a..dab571e 100644
--- a/tools/benchmark_files.cpp
+++ b/tools/benchmark_files.cpp
@@ -31,11 +31,11 @@
 #include <stxxl/bits/version.h>
 #include <stxxl/bits/common/cmdline.h>
 
-
 using stxxl::request_ptr;
 using stxxl::file;
 using stxxl::timestamp;
-
+using stxxl::unsigned_type;
+using stxxl::uint64;
 
 #ifdef BLOCK_ALIGN
  #undef BLOCK_ALIGN
@@ -45,7 +45,6 @@ using stxxl::timestamp;
 
 #define POLL_DELAY 1000
 
-
 #if STXXL_WINDOWS
 const char* default_file_type = "wincall";
 #else
@@ -78,7 +77,6 @@ void watch_times(request_ptr reqs[], unsigned n, double* out)
     delete[] finished;
 }
 
-
 void out_stat(double start, double end, double* times, unsigned n, const std::vector<std::string>& names)
 {
     for (unsigned i = 0; i < n; i++)
@@ -96,18 +94,18 @@ static inline double throughput(stxxl::int64 bytes, double seconds)
 {
     if (seconds == 0.0)
         return 0.0;
-    return bytes / (1024 * 1024) / seconds;
+    return (double)bytes / (1024 * 1024) / seconds;
 }
 
 int benchmark_files(int argc, char* argv[])
 {
-    stxxl::uint64 offset = 0, length = 0;
+    uint64 offset = 0, length = 0;
 
     bool no_direct_io = false;
     bool sync_io = false;
     bool resize_after_open = false;
     std::string file_type = default_file_type;
-    stxxl::uint64 block_size = 0;
+    unsigned_type block_size = 0;
     unsigned int batch_size = 1;
     std::string opstr = "wv";
     unsigned pattern = 0;
@@ -145,7 +143,7 @@ int benchmark_files(int argc, char* argv[])
     if (!cp.process(argc, argv))
         return -1;
 
-    stxxl::uint64 endpos = offset + length;
+    uint64 endpos = offset + length;
 
     if (block_size == 0)
         block_size = 8 * MB;
@@ -187,9 +185,9 @@ int benchmark_files(int argc, char* argv[])
     const size_t nfiles = files_arr.size();
     bool verify_failed = false;
 
-    const stxxl::unsigned_type step_size = block_size * batch_size;
-    const stxxl::uint64 block_size_int = block_size / sizeof(int);
-    const stxxl::uint64 step_size_int = step_size / sizeof(int);
+    const unsigned_type step_size = block_size * batch_size;
+    const unsigned_type block_size_int = block_size / sizeof(int);
+    const uint64 step_size_int = step_size / sizeof(int);
 
     unsigned* buffer = (unsigned*)stxxl::aligned_alloc<BLOCK_ALIGN>(step_size * nfiles);
     file** files = new file*[nfiles];
@@ -234,11 +232,11 @@ int benchmark_files(int argc, char* argv[])
 
     stxxl::timer t_total(true);
     try {
-        while (offset + stxxl::uint64(step_size) <= endpos || length == 0)
+        while (offset + uint64(step_size) <= endpos || length == 0)
         {
-            const stxxl::uint64 current_step_size = (length == 0) ? stxxl::int64(step_size) : std::min<stxxl::int64>(step_size, endpos - offset);
-            const stxxl::uint64 current_step_size_int = current_step_size / sizeof(int);
-            const stxxl::uint64 current_num_blocks = stxxl::div_ceil(current_step_size, block_size);
+            const uint64 current_step_size = (length == 0) ? stxxl::int64(step_size) : std::min<stxxl::int64>(step_size, endpos - offset);
+            const uint64 current_step_size_int = current_step_size / sizeof(int);
+            const unsigned_type current_num_blocks = (unsigned_type)stxxl::div_ceil(current_step_size, block_size);
 
             std::cout << "File offset    " << std::setw(8) << offset / MB << " MiB: " << std::fixed;
 
@@ -247,7 +245,7 @@ int benchmark_files(int argc, char* argv[])
             if (do_write)
             {
                 // write block number (512 byte blocks) into each block at position 42 * sizeof(unsigned)
-                for (stxxl::uint64 j = 42, b = offset >> 9; j < current_step_size_int; j += 512 / sizeof(unsigned), ++b)
+                for (uint64 j = 42, b = offset >> 9; j < current_step_size_int; j += 512 / sizeof(unsigned), ++b)
                 {
                     for (unsigned i = 0; i < nfiles; i++)
                         buffer[current_step_size_int * i + j] = (unsigned int)b;
@@ -255,12 +253,11 @@ int benchmark_files(int argc, char* argv[])
 
                 for (unsigned i = 0; i < nfiles; i++)
                 {
-                    for (unsigned j = 0; j < current_num_blocks; j++)
+                    for (unsigned_type j = 0; j < current_num_blocks; j++)
                         reqs[i * current_num_blocks + j] =
                             files[i]->awrite(buffer + current_step_size_int * i + j * block_size_int,
                                              offset + j * block_size,
-                                             block_size,
-                                             stxxl::default_completion_handler());
+                                             (unsigned_type)block_size);
                 }
 
  #ifdef WATCH_TIMES
@@ -291,9 +288,10 @@ int benchmark_files(int argc, char* argv[])
             out_stat(begin, end, w_finish_times, nfiles, files_arr);
  #endif
             std::cout << std::setw(2) << nfiles << " * "
-                      << std::setw(8) << std::setprecision(3) << (throughput(current_step_size, elapsed)) << " = "
-                      << std::setw(8) << std::setprecision(3) << (throughput(current_step_size, elapsed) * nfiles) << " MiB/s write,";
-
+                      << std::setw(8) << std::setprecision(3)
+                      << (throughput(current_step_size, elapsed)) << " = "
+                      << std::setw(8) << std::setprecision(3)
+                      << (throughput(current_step_size, elapsed) * (double)nfiles) << " MiB/s write,";
 
             begin = end = timestamp();
 
@@ -302,10 +300,10 @@ int benchmark_files(int argc, char* argv[])
                 for (unsigned i = 0; i < nfiles; i++)
                 {
                     for (unsigned j = 0; j < current_num_blocks; j++)
-                        reqs[i * current_num_blocks + j] = files[i]->aread(buffer + current_step_size_int * i + j * block_size_int,
-                                                                           offset + j * block_size,
-                                                                           block_size,
-                                                                           stxxl::default_completion_handler());
+                        reqs[i * current_num_blocks + j] =
+                            files[i]->aread(buffer + current_step_size_int * i + j * block_size_int,
+                                            offset + j * block_size,
+                                            (unsigned_type)block_size);
                 }
 
  #ifdef WATCH_TIMES
@@ -333,8 +331,10 @@ int benchmark_files(int argc, char* argv[])
 #endif
 
             std::cout << std::setw(2) << nfiles << " * "
-                      << std::setw(8) << std::setprecision(3) << (throughput(current_step_size, elapsed)) << " = "
-                      << std::setw(8) << std::setprecision(3) << (throughput(current_step_size, elapsed) * nfiles) << " MiB/s read";
+                      << std::setw(8) << std::setprecision(3)
+                      << (throughput(current_step_size, elapsed)) << " = "
+                      << std::setw(8) << std::setprecision(3)
+                      << (throughput(current_step_size, elapsed) * (double)nfiles) << " MiB/s read";
 
 #ifdef WATCH_TIMES
             out_stat(begin, end, r_finish_times, nfiles, files_arr);
@@ -345,8 +345,8 @@ int benchmark_files(int argc, char* argv[])
                 for (unsigned d = 0; d < nfiles; ++d)
                 {
                     for (unsigned s = 0; s < (current_step_size >> 9); ++s) {
-                        stxxl::uint64 i = d * current_step_size_int + s * (512 / sizeof(unsigned)) + 42;
-                        stxxl::uint64 b = (offset >> 9) + s;
+                        uint64 i = d * current_step_size_int + s * (512 / sizeof(unsigned)) + 42;
+                        uint64 b = (offset >> 9) + s;
                         if (buffer[i] != b)
                         {
                             verify_failed = true;
@@ -358,12 +358,12 @@ int benchmark_files(int argc, char* argv[])
                     }
                 }
 
-                for (stxxl::uint64 i = 0; i < nfiles * current_step_size_int; i++)
+                for (uint64 i = 0; i < nfiles * current_step_size_int; i++)
                 {
                     if (buffer[i] != (pattern ? pattern : i))
                     {
                         stxxl::int64 ibuf = i / current_step_size_int;
-                        stxxl::uint64 pos = i % current_step_size_int;
+                        uint64 pos = i % current_step_size_int;
 
                         std::cout << std::endl
                                   << "Error on file " << ibuf << " position " << std::hex << std::setw(8) << offset + pos * sizeof(int)
@@ -392,21 +392,32 @@ int benchmark_files(int argc, char* argv[])
     // the following line of output is parsed by misc/filebench-avgplot.sh
     std::cout << "# Average over " << std::setw(8) << stxxl::STXXL_MAX(totalsizewrite, totalsizeread) / MB << " MiB: ";
     std::cout << std::setw(2) << nfiles << " * "
-              << std::setw(8) << std::setprecision(3) << (throughput(totalsizewrite, totaltimewrite)) << " = "
-              << std::setw(8) << std::setprecision(3) << (throughput(totalsizewrite, totaltimewrite) * nfiles) << " MiB/s write,";
+              << std::setw(8) << std::setprecision(3)
+              << (throughput(totalsizewrite, totaltimewrite)) << " = "
+              << std::setw(8) << std::setprecision(3)
+              << (throughput(totalsizewrite, totaltimewrite) * (double)nfiles) << " MiB/s write,";
+
     std::cout << std::setw(2) << nfiles << " * "
-              << std::setw(8) << std::setprecision(3) << (throughput(totalsizeread, totaltimeread)) << " = "
-              << std::setw(8) << std::setprecision(3) << (throughput(totalsizeread, totaltimeread) * nfiles) << " MiB/s read"
+              << std::setw(8) << std::setprecision(3)
+              << (throughput(totalsizeread, totaltimeread)) << " = "
+              << std::setw(8) << std::setprecision(3)
+              << (throughput(totalsizeread, totaltimeread) * (double)nfiles) << " MiB/s read"
               << std::endl;
+
     if (totaltimewrite != 0.0)
         std::cout << "# Write time   " << std::setw(8) << std::setprecision(3) << totaltimewrite << " s" << std::endl;
     if (totaltimeread != 0.0)
         std::cout << "# Read time    " << std::setw(8) << std::setprecision(3) << totaltimeread << " s" << std::endl;
-    std::cout << "# Non-I/O time " << std::setw(8) << std::setprecision(3) << (t_total.seconds() - totaltimewrite - totaltimeread) << " s, average throughput "
-              << std::setw(8) << std::setprecision(3) << (throughput(totalsizewrite + totalsizeread, t_total.seconds() - totaltimewrite - totaltimeread) * nfiles) << " MiB/s"
+
+    std::cout << "# Non-I/O time " << std::setw(8) << std::setprecision(3)
+              << (t_total.seconds() - totaltimewrite - totaltimeread) << " s, average throughput "
+              << std::setw(8) << std::setprecision(3)
+              << (throughput(totalsizewrite + totalsizeread, t_total.seconds() - totaltimewrite - totaltimeread) * (double)nfiles) << " MiB/s"
               << std::endl;
+
     std::cout << "# Total time   " << std::setw(8) << std::setprecision(3) << t_total.seconds() << " s, average throughput "
-              << std::setw(8) << std::setprecision(3) << (throughput(totalsizewrite + totalsizeread, t_total.seconds()) * nfiles) << " MiB/s"
+              << std::setw(8) << std::setprecision(3)
+              << (throughput(totalsizewrite + totalsizeread, t_total.seconds()) * (double)nfiles) << " MiB/s"
               << std::endl;
 
     if (do_verify)
diff --git a/tools/benchmark_pqueue.cpp b/tools/benchmark_pqueue.cpp
index b5e673f..61af1ff 100644
--- a/tools/benchmark_pqueue.cpp
+++ b/tools/benchmark_pqueue.cpp
@@ -31,6 +31,7 @@ static const char* description =
 
 using stxxl::uint32;
 using stxxl::uint64;
+using stxxl::internal_size_type;
 
 #define MiB (1024 * 1024)
 #define PRINTMOD (16 * MiB)
@@ -86,11 +87,11 @@ static inline void progress(const char* text, uint64 i, uint64 nelements)
     if ((i % PRINTMOD) == 0)
         STXXL_MSG(text << " " << i << " ("
                        << std::setprecision(5)
-                       << (i * 100.0 / nelements) << " %)");
+                       << ((double)i * 100.0 / (double)nelements) << " %)");
 }
 
 template <typename PQType>
-void run_pqueue_insert_delete(uint64 nelements, uint64 mem_for_pools)
+void run_pqueue_insert_delete(uint64 nelements, internal_size_type mem_for_pools)
 {
     typedef typename PQType::value_type ValueType;
 
@@ -141,7 +142,7 @@ void run_pqueue_insert_delete(uint64 nelements, uint64 mem_for_pools)
 }
 
 template <typename PQType>
-void run_pqueue_insert_intermixed(uint64 nelements, uint64 mem_for_pools)
+void run_pqueue_insert_intermixed(uint64 nelements, internal_size_type mem_for_pools)
 {
     typedef typename PQType::value_type ValueType;
 
@@ -202,11 +203,13 @@ void run_pqueue_insert_intermixed(uint64 nelements, uint64 mem_for_pools)
     std::cout << stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin;
 }
 
-template <typename ValueType, uint64 mib_for_queue, uint64 mib_for_pools, uint64 maxvolume>
+template <typename ValueType,
+          internal_size_type mib_for_queue, internal_size_type mib_for_pools,
+          uint64 maxvolume>
 int do_benchmark_pqueue(uint64 volume, int opseq)
 {
-    const uint64 mem_for_queue = mib_for_queue * MiB;
-    const uint64 mem_for_pools = mib_for_pools * MiB;
+    const internal_size_type mem_for_queue = mib_for_queue * MiB;
+    const internal_size_type mem_for_pools = mib_for_pools * MiB;
 
     typedef typename stxxl::PRIORITY_QUEUE_GENERATOR<
             ValueType, my_cmp<ValueType>,
@@ -265,8 +268,10 @@ int do_benchmark_pqueue_config(unsigned pqconfig, uint64 size, unsigned opseq)
         return do_benchmark_pqueue<ValueType, 128, 128, 16>(size, opseq);
     else if (pqconfig == 2)
         return do_benchmark_pqueue<ValueType, 512, 512, 64>(size, opseq);
+#if __x86_64__ || __LP64__ || (__WORDSIZE == 64)
     else if (pqconfig == 3)
         return do_benchmark_pqueue<ValueType, 4096, 4096, 512>(size, opseq);
+#endif
     else
         return 0;
 }
@@ -304,7 +309,14 @@ int benchmark_pqueue(int argc, char* argv[])
     cp.add_uint('t', "type", "Value type of tested priority queue:\n 1 = pair of uint32,\n 2 = pair of uint64 (default),\n 3 = 24 byte struct\n 0 = all of the above", type);
 
     unsigned pqconfig = 2;
-    cp.add_uint('p', "pq", "Priority queue configuration to test:\n 1 = small (256 MiB RAM, 4 GiB elements)\n 2 = medium (1 GiB RAM, 16 GiB elements) (default)\n 3 = big (8 GiB RAM, 64 GiB elements)\n 0 = all of the above", pqconfig);
+    cp.add_uint('p', "pq",
+                "Priority queue configuration to test:\n"
+                "1 = small (256 MiB RAM, 4 GiB elements)\n"
+                "2 = medium (1 GiB RAM, 16 GiB elements) (default)\n"
+#if __x86_64__ || __LP64__ || (__WORDSIZE == 64)
+                "3 = big (8 GiB RAM, 64 GiB elements)\n"
+#endif
+                "0 = all of the above", pqconfig);
 
     unsigned opseq = 1;
     cp.add_uint('o', "opseq", "Operation sequence to perform:\n 1 = insert all, delete all (default)\n 2 = insert all, intermixed insert/delete\n 0 = all of the above", opseq);
diff --git a/tools/benchmark_sort.cpp b/tools/benchmark_sort.cpp
index e7de513..d0a9e28 100644
--- a/tools/benchmark_sort.cpp
+++ b/tools/benchmark_sort.cpp
@@ -27,6 +27,7 @@
 using stxxl::timestamp;
 using stxxl::uint32;
 using stxxl::uint64;
+using stxxl::unsigned_type;
 
 #define MB (1024 * 1024)
 
@@ -123,11 +124,12 @@ class BenchmarkSort
     static void output_result(double elapsed, uint64 vec_size)
     {
         std::cout << "finished in " << elapsed << " seconds @ "
-                  << (vec_size * sizeof(value_type) / MB / elapsed) << " MiB/s" << std::endl;
+                  << ((double)vec_size * sizeof(value_type) / MB / elapsed)
+                  << " MiB/s" << std::endl;
     }
 
 public:
-    BenchmarkSort(const char* desc, uint64 length, uint64 memsize)
+    BenchmarkSort(const char* desc, uint64 length, unsigned_type memsize)
     {
         // construct vector
         typedef typename stxxl::VECTOR_GENERATOR<ValueType>::result vector_type;
@@ -205,17 +207,20 @@ int benchmark_sort(int argc, char* argv[])
     uint64 length = 0;
     cp.add_param_bytes("size", "Amount of data to sort (e.g. 1GiB)", length);
 
-    uint64 memsize = 256 * MB;
+    unsigned_type memsize = 256 * MB;
     cp.add_bytes('M', "ram", "Amount of RAM to use when sorting, default: 256 MiB", memsize);
 
     if (!cp.process(argc, argv))
         return -1;
 
-    BenchmarkSort<pair32_type, stxxl::random_number32>("pair of uint32", length, memsize);
+    BenchmarkSort<pair32_type, stxxl::random_number32>
+        ("pair of uint32", length, (unsigned_type)memsize);
 
-    BenchmarkSort<pair64_type, stxxl::random_number64>("pair of uint64", length, memsize);
+    BenchmarkSort<pair64_type, stxxl::random_number32>
+        ("pair of uint64", length, (unsigned_type)memsize);
 
-    BenchmarkSort<struct64_type, stxxl::random_number64>("struct of 64 bytes", length, memsize);
+    BenchmarkSort<struct64_type, stxxl::random_number32>
+        ("struct of 64 bytes", length, (unsigned_type)memsize);
 
     return 0;
 }
diff --git a/tools/benchmarks/CMakeLists.txt b/tools/benchmarks/CMakeLists.txt
index 3978b8a..e68a149 100644
--- a/tools/benchmarks/CMakeLists.txt
+++ b/tools/benchmarks/CMakeLists.txt
@@ -18,7 +18,7 @@ stxxl_build_test(stack_benchmark)
 
 add_define(benchmark_naive_matrix "STXXL_VERBOSE_LEVEL=0")
 
-if(BUILD_TESTS)
+if(USE_TPIE AND BUILD_TESTS)
   file(GLOB TPIE_DIR_GLOB RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "tpie_*")
 
   foreach(TPIE_TEST ${TPIE_DIR_GLOB})
@@ -30,7 +30,7 @@ if(BUILD_TESTS)
 
   if(EXISTS "${TPIE_DIR}/lib/libtpie.a")
 
-    message("Found compiled TPIE library at ${TPIE_DIR}")
+    message(STATUS "Found compiled TPIE library at ${TPIE_DIR}")
 
     add_library(tpie STATIC IMPORTED)
     set_property(TARGET tpie PROPERTY IMPORTED_LOCATION "${TPIE_DIR}/lib/libtpie.a")
@@ -70,6 +70,6 @@ if(BUILD_TESTS)
       target_link_libraries(berkeley_db_benchmark ${STXXL_LIBRARIES} tpie ${BERKELEYDB_LIBRARIES})
     endif()
   else()
-    message("Could not find TPIE, skipping extra benchmarks.")
+    message(SEND_ERROR "Could not find TPIE for extra benchmarks.")
   endif()
-endif(BUILD_TESTS)
+endif(USE_TPIE AND BUILD_TESTS)
diff --git a/tools/benchmarks/benchmark_naive_matrix.cpp b/tools/benchmarks/benchmark_naive_matrix.cpp
index 10691ef..c30b92f 100644
--- a/tools/benchmarks/benchmark_naive_matrix.cpp
+++ b/tools/benchmarks/benchmark_naive_matrix.cpp
@@ -39,7 +39,6 @@ public:
         return height;
     }
 
-
     T & element(stxxl::unsigned_type x, stxxl::unsigned_type y)
     {
         //row-major
diff --git a/tools/benchmarks/berkeley_db_benchmark.cpp b/tools/benchmarks/berkeley_db_benchmark.cpp
index fa2c6a0..1a3e5d3 100644
--- a/tools/benchmarks/berkeley_db_benchmark.cpp
+++ b/tools/benchmarks/berkeley_db_benchmark.cpp
@@ -18,13 +18,11 @@
 //! Volume 38, Issue 6, Pages 589-637, May 2008
 //! DOI: 10.1002/spe.844
 
-
 #include <stxxl/vector>
 #include <stxxl/map>
 #include <stxxl/timer>
 #include <stxxl/stream>
 
-
 ///// BDB header ////////////
 #include <db_cxx.h>
 
@@ -42,7 +40,6 @@
 #define NODE_BLOCK_SIZE         (32 * 1024)
 #define LEAF_BLOCK_SIZE         (32 * 1024)
 
-
 #define LEAF_BLOCK_SIZE         (32 * 1024)
 
 #define TOTAL_CACHE_SIZE        (750 * 1024 * 1024)
@@ -56,13 +53,11 @@
 
 #define SORTER_MEM              (TOTAL_CACHE_SIZE - 1024 * 1024 * 2 * 4)
 
-
 #define SCAN_LIMIT(x)   (x)
 
 //#define BDB_FILE "/data3/bdb_file"
 #define BDB_FILE "/var/tmp/bdb_file"
 
-
 // BDB settings
 u_int32_t pagesize = LEAF_BLOCK_SIZE;
 u_int bulkbufsize = 4 * 1024 * 1024;
@@ -79,7 +74,6 @@ struct my_key
     char keybuf[KEY_SIZE];
 };
 
-
 std::ostream& operator << (std::ostream& o, const my_key& obj)
 {
     for (int i = 0; i < KEY_SIZE; ++i)
@@ -118,7 +112,6 @@ bool operator >= (const my_key& a, const my_key& b)
     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) >= 0;
 }
 
-
 struct my_data
 {
     char databuf[DATA_SIZE];
@@ -149,7 +142,6 @@ struct comp_type : std::binary_function<my_key, my_key, bool>
     }
 };
 
-
 /// TPIE  declarations
 // Key type.
 typedef my_key bkey_t;
@@ -168,7 +160,6 @@ struct key_from_el {
     }
 };
 
-
 // Temporary distinction btw UN*X and WIN, since there are some
 // problems with the MMAP collection implementation.
 #ifdef _WIN32
@@ -177,7 +168,6 @@ typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el, BTE_COLLECTION_UFS> u
 typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el> u_btree_t;
 #endif
 
-
 void init()
 {
     memset(max_key.keybuf, std::numeric_limits<unsigned char>::max(), KEY_SIZE);
@@ -194,11 +184,9 @@ typedef stxxl::map<my_key, my_data, comp_type, NODE_BLOCK_SIZE, LEAF_BLOCK_SIZE>
 typedef stxxl::VECTOR_GENERATOR<std::pair<my_key, my_data>, 1, 1>::result vector_type;
 //typedef stxxl::vector<std::pair<my_key,my_data>,1,stxxl::lru_pager<1>,512*1024>  vector_type;
 
-
 //#define KEYPOS        (i % KEY_SIZE)
 //#define VALUE         (myrand() % 26)
 
-
 #if 0
 unsigned ran32State = 0xdeadbeef;
 inline unsigned myrand()
@@ -239,7 +227,6 @@ void run_bdb_btree(stxxl::int64 ops)
     memset(key1_storage.keybuf, 'a', KEY_SIZE);
     memset(data1_storage.databuf, 'b', DATA_SIZE);
 
-
     Db db(NULL, 0);             // Instantiate the Db object
 
     try {
@@ -255,7 +242,6 @@ void run_bdb_btree(stxxl::int64 ops)
                 DB_CREATE,      // Open flags
                 0);             // File mode (using defaults)
 
-
         // here we start with the tests
         Dbt key1(key1_storage.keybuf, KEY_SIZE);
         Dbt data1(data1_storage.databuf, DATA_SIZE);
@@ -267,7 +253,6 @@ void run_bdb_btree(stxxl::int64 ops)
 
         ran32State = 0xdeadbeef;
 
-
         DB_BTREE_STAT* dbstat;
 
         db.stat(NULL, &dbstat, 0);
@@ -296,7 +281,6 @@ void run_bdb_btree(stxxl::int64 ops)
         Timer.reset();
         Timer.start();
 
-
         Dbc* cursorp;
         db.cursor(NULL, &cursorp, 0);
 
@@ -333,14 +317,12 @@ void run_bdb_btree(stxxl::int64 ops)
             if (last_key < key1_storage)
                 std::swap(last_key, key1_storage);
 
-
             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
             Dbt datax(data1_storage.databuf, DATA_SIZE);
 
             if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND)
                 continue;
 
-
             while (*((my_key*)keyx.get_data()) <= last_key)
             {
                 ++n_scanned;
@@ -361,7 +343,6 @@ void run_bdb_btree(stxxl::int64 ops)
         if (cursorp != NULL)
             cursorp->close();
 
-
         STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
                   " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
                   " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
@@ -417,7 +398,6 @@ void run_stxxl_map(stxxl::int64 ops)
     memset(element.first.keybuf, 'a', KEY_SIZE);
     memset(element.second.databuf, 'b', DATA_SIZE);
 
-
     stxxl::timer Timer;
     stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
     stxxl::int64 i;
@@ -545,8 +525,8 @@ class rand_key_gen
 public:
     typedef my_key value_type;
 
-    rand_key_gen(stxxl::int64 el, my_key& cur) :
-        counter(el), current(cur)
+    rand_key_gen(stxxl::int64 el, my_key& cur)
+        : counter(el), current(cur)
     {
         //const stxxl::int64  & i = counter;
         //current.keybuf[KEYPOS] = letters[VALUE];
@@ -590,7 +570,6 @@ public:
         if (!empty())
             current.first = *in;
 
-
         return *this;
     }
     bool empty() const { return in.empty(); }
@@ -627,7 +606,6 @@ void run_stxxl_map_big(stxxl::int64 n, unsigned ops)
         stxxl::stream::materialize(Key2Pair, SortedSeq.begin());
     }
 
-
     Timer.stop();
 
     STXXL_MSG("Finished sorting input. Elapsed time: " <<
@@ -644,7 +622,6 @@ void run_stxxl_map_big(stxxl::int64 n, unsigned ops)
 
     Timer.stop();
 
-
     STXXL_MSG("Records in map: " << Map.size());
     STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
               " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
@@ -803,7 +780,6 @@ public:
     }
 };
 
-
 void run_tpie_btree_big(stxxl::int64 n, unsigned ops)
 {
     el_t element;
@@ -829,7 +805,6 @@ void run_tpie_btree_big(stxxl::int64 n, unsigned ops)
 
     Timer.start();
 
-
     {
         rand_key_gen Gen(n, element.key_);
         typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
@@ -845,11 +820,9 @@ void run_tpie_btree_big(stxxl::int64 n, unsigned ops)
 
     Timer.stop();
 
-
     STXXL_MSG("Finished sorting input. Elapsed time: " <<
               (Timer.mseconds() / 1000.) << " seconds.");
 
-
     Timer.reset();
     Timer.start();
 
@@ -875,18 +848,15 @@ void run_tpie_btree_big(stxxl::int64 n, unsigned ops)
     if (u_btree->load_sorted(is, 1.0, 1.0) != AMI_ERROR_NO_ERROR)
         cerr << "Error during bulk loading.\n";
 
-
     Timer.stop();
 
     STXXL_MSG("Records in map: " << u_btree->size());
     STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
               " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
 
-
     ////////////////////////////////////////
     Timer.reset();
 
-
     Timer.start();
 
     for (i = 0; i < n_inserts; ++i)
@@ -902,14 +872,11 @@ void run_tpie_btree_big(stxxl::int64 n, unsigned ops)
     STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
               " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
 
-
     ////////////////////////////////////////////////
     Timer.reset();
 
-
     Timer.start();
 
-
     el_t result;
     for (i = 0; i < n_locates; ++i)
     {
@@ -922,11 +889,9 @@ void run_tpie_btree_big(stxxl::int64 n, unsigned ops)
     STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
               " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
 
-
     ////////////////////////////////////
     Timer.reset();
 
-
     Timer.start();
 
     stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
@@ -943,7 +908,6 @@ void run_tpie_btree_big(stxxl::int64 n, unsigned ops)
         else
             n_scanned += u_btree->range_query(begin_key, element.key_, NULL, filter);
 
-
         if (n_scanned >= SCAN_LIMIT(n))
         {
             ++i;
@@ -958,7 +922,6 @@ void run_tpie_btree_big(stxxl::int64 n, unsigned ops)
               " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
               " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
 
-
     //////////////////////////////////////
     ran32State = 0xdeadbeef;
     memset(element.key_.keybuf, 'a', KEY_SIZE);
@@ -999,7 +962,6 @@ void run_bdb_btree_big(stxxl::int64 n, unsigned ops)
     memset(key1_storage.keybuf, 'a', KEY_SIZE);
     memset(data1_storage.databuf, 'b', DATA_SIZE);
 
-
     Db db(NULL, 0);                   // Instantiate the Db object
 
     try {
@@ -1072,7 +1034,6 @@ void run_bdb_btree_big(stxxl::int64 n, unsigned ops)
         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
         ////////////////////////////////////////
 
-
         Timer.reset();
         Timer.start();
 
@@ -1096,7 +1057,6 @@ void run_bdb_btree_big(stxxl::int64 n, unsigned ops)
         Timer.reset();
         Timer.start();
 
-
         Dbc* cursorp;
         db.cursor(NULL, &cursorp, 0);
 
@@ -1134,7 +1094,6 @@ void run_bdb_btree_big(stxxl::int64 n, unsigned ops)
             if (last_key < key1_storage)
                 std::swap(last_key, key1_storage);
 
-
             //STXXL_MSG("Looking     "<<key1_storage<<" scanned: "<<n_scanned);
             //STXXL_MSG("Upper bound "<<last_key);
 
@@ -1147,12 +1106,10 @@ void run_bdb_btree_big(stxxl::int64 n, unsigned ops)
             Dbt datax(data1_storage.databuf, DATA_SIZE);
 #endif
 
-
 #ifdef BDB_BULK_SCAN
             if (cursorp->get(&keyx, &datax, DB_SET_RANGE | DB_MULTIPLE_KEY) == DB_NOTFOUND)
                 continue;
 
-
             do
             {
                 DbMultipleKeyDataIterator BulkIterator(datax);
@@ -1166,7 +1123,6 @@ void run_bdb_btree_big(stxxl::int64 n, unsigned ops)
                 if (cursorp->get(&keyx, &datax, DB_NEXT | DB_MULTIPLE_KEY) == DB_NOTFOUND)
                     break;
 
-
                 if (*((my_key*)keyx.get_data()) > last_key)
                 {
                     break;
@@ -1185,7 +1141,6 @@ void run_bdb_btree_big(stxxl::int64 n, unsigned ops)
             }
 #endif
 
-
             if (n_scanned >= SCAN_LIMIT(n))
             {
                 ++i;
@@ -1199,7 +1154,6 @@ void run_bdb_btree_big(stxxl::int64 n, unsigned ops)
         if (cursorp != NULL)
             cursorp->close();
 
-
         STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
                   " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
                   " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
@@ -1250,7 +1204,6 @@ void run_bdb_btree_big(stxxl::int64 n, unsigned ops)
 #endif
 }
 
-
 int main(int argc, char* argv[])
 {
     STXXL_MSG("stxxl::map Real Node block size: " << REAL_NODE_BLOCK_SIZE << " bytes");
@@ -1277,14 +1230,13 @@ int main(int argc, char* argv[])
     init();
 
     int version = atoi(argv[1]);
-    stxxl::int64 ops = stxxl::atoint64(argv[2]);
+    stxxl::uint64 ops = stxxl::atouint64(argv[2]);
 
     STXXL_MSG("Running version      : " << version);
     STXXL_MSG("Operations to perform: " << ops);
     STXXL_MSG("Btree cache size     : " << TOTAL_CACHE_SIZE << " bytes");
     STXXL_MSG("Leaf block size      : " << LEAF_BLOCK_SIZE << " bytes");
 
-
     switch (version)
     {
     case 1:
diff --git a/tools/benchmarks/matrix_benchmark.cpp b/tools/benchmarks/matrix_benchmark.cpp
index ad4498d..fe823dc 100644
--- a/tools/benchmarks/matrix_benchmark.cpp
+++ b/tools/benchmarks/matrix_benchmark.cpp
@@ -19,7 +19,9 @@
 #include <stxxl/bits/containers/matrix.h>
 
 using stxxl::uint64;
+using stxxl::unsigned_type;
 using stxxl::int_type;
+using stxxl::internal_size_type;
 
 int main(int argc, char** argv)
 {
@@ -30,7 +32,7 @@ int main(int argc, char** argv)
     #endif
 
     int rank = 10000;
-    uint64 internal_memory = 256 * 1024 * 1024;
+    internal_size_type internal_memory = 256 * 1024 * 1024;
     int mult_algo_num = 5;
     int sched_algo_num = 2;
 
diff --git a/tools/benchmarks/monotonic_pq.cpp b/tools/benchmarks/monotonic_pq.cpp
index bc4e9f7..773bffc 100644
--- a/tools/benchmarks/monotonic_pq.cpp
+++ b/tools/benchmarks/monotonic_pq.cpp
@@ -28,10 +28,7 @@
 #include <stxxl/stats>
 #include <stxxl/timer>
 
-const stxxl::unsigned_type mega = 1024 * 1024;    //1 * 1024 does not work here
-
-//const int block_size = STXXL_DEFAULT_BLOCK_SIZE(my_type);
-const stxxl::unsigned_type block_size = 4 * mega;
+const stxxl::unsigned_type mega = 1024 * 1024;
 
 #define RECORD_SIZE 16
 #define LOAD 0
@@ -53,12 +50,12 @@ struct my_type
 #endif
 
     my_type() { }
-    my_type(key_type __key) : key(__key) { }
+    my_type(key_type k) : key(k) { }
 #if LOAD
-    my_type(key_type __key, key_type __load) : key(__key), load(__load) { }
+    my_type(key_type k, key_type l) : key(k), load(l) { }
 #endif
 
-    void operator = (const key_type& __key) { key = __key; }
+    void operator = (const key_type& k) { key = k; }
 #if LOAD
     void operator = (const my_type& mt)
     {
@@ -239,7 +236,6 @@ int main(int argc, char* argv[])
 //         BlockSize = Config::BlockSize,
 //         ExtKMAX = Config::ExtKMAX
 
-
 /*  STXXL_MSG ( "Blocks fitting into internal memory m: "<<gen::m );
   STXXL_MSG ( "X : "<<gen::X );  //maximum number of internal elements //X = B * (settings::k - m) / settings::E,
   STXXL_MSG ( "Expected internal memory consumption: "<< (gen::EConsumption / 1048576) << " MiB");*/
@@ -260,7 +256,6 @@ int main(int argc, char* argv[])
     pq_type p(mem_for_pools / 2, mem_for_pools / 2);
     stxxl::int64 nelements = stxxl::int64(megabytes * mega / sizeof(my_type)), i;
 
-
     STXXL_MSG("Internal memory consumption of the priority queue: " << p.mem_cons() << " B");
     STXXL_MSG("Peak number of elements (n): " << nelements);
     STXXL_MSG("Max number of elements to contain: " << (stxxl::uint64(pq_type::N) * pq_type::IntKMAX * pq_type::IntKMAX * pq_type::ExtKMAX * pq_type::ExtKMAX));
@@ -282,7 +277,8 @@ int main(int argc, char* argv[])
     {
         if ((i % mega) == 0)
             STXXL_MSG(
-                std::fixed << std::setprecision(2) << std::setw(5) << (100.0 * i / nelements) << "% "
+                std::fixed << std::setprecision(2) << std::setw(5)
+                           << (100.0 * (double)i / (double)nelements) << "% "
                            << "Inserting element " << i << " top() == " << least.key << " @ "
                            << std::setprecision(3) << Timer.seconds() << " s"
                            << std::setprecision(6) << std::resetiosflags(std::ios_base::floatfield));
@@ -378,7 +374,8 @@ int main(int argc, char* argv[])
 
         if ((i % mega) == 0)
             STXXL_MSG(
-                std::fixed << std::setprecision(2) << std::setw(5) << (100.0 * i / nelements) << "% "
+                std::fixed << std::setprecision(2) << std::setw(5)
+                           << (100.0 * (double)i / (double)nelements) << "% "
                            << "Popped element " << i << " == " << least.key << " @ "
                            << std::setprecision(3) << Timer.seconds() << " s"
                            << std::setprecision(6) << std::resetiosflags(std::ios_base::floatfield));
diff --git a/tools/benchmarks/pq_benchmark.cpp b/tools/benchmarks/pq_benchmark.cpp
index b39f16a..c096d69 100644
--- a/tools/benchmarks/pq_benchmark.cpp
+++ b/tools/benchmarks/pq_benchmark.cpp
@@ -18,7 +18,6 @@
 //! Volume 38, Issue 6, Pages 589-637, May 2008
 //! DOI: 10.1002/spe.844
 
-
 #include <limits>
 #include <stxxl/priority_queue>
 #include <stxxl/stats>
@@ -31,10 +30,8 @@
 #define PREFETCH_POOL_SIZE                      ((TOTAL_PQ_MEM_SIZE - PQ_MEM_SIZE) / 2)
 #define WRITE_POOL_SIZE                                         (PREFETCH_POOL_SIZE)
 
-
 #define MAX_ELEMENTS (2000 * 1024 * 1024)
 
-
 struct my_record
 {
     int key;
@@ -69,7 +66,6 @@ bool operator > (const my_record& a, const my_record& b)
     return a.key > b.key;
 }
 
-
 struct comp_type : std::binary_function<my_record, my_record, bool>
 {
     bool operator () (const my_record& a, const my_record& b) const
@@ -82,7 +78,6 @@ struct comp_type : std::binary_function<my_record, my_record, bool>
     }
 };
 
-
 typedef stxxl::PRIORITY_QUEUE_GENERATOR<my_record, comp_type,
                                         PQ_MEM_SIZE, MAX_ELEMENTS / (1024 / 8)>::result pq_type;
 
@@ -90,7 +85,6 @@ typedef pq_type::block_type block_type;
 
 #define    BLOCK_SIZE block_type::raw_size
 
-
 #if 1
 unsigned ran32State = 0xdeadbeef;
 inline int myrand()
@@ -105,7 +99,6 @@ inline long long unsigned myrand()
 }
 #endif
 
-
 void run_stxxl_insert_all_delete_all(stxxl::uint64 ops)
 {
     pq_type PQ(PREFETCH_POOL_SIZE, WRITE_POOL_SIZE);
@@ -165,7 +158,6 @@ void run_stxxl_insert_all_delete_all(stxxl::uint64 ops)
     std::cout << stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin;
 }
 
-
 void run_stxxl_intermixed(stxxl::uint64 ops)
 {
     pq_type PQ(PREFETCH_POOL_SIZE, WRITE_POOL_SIZE);
@@ -251,7 +243,6 @@ int main(int argc, char* argv[])
     int version = atoi(argv[1]);
     stxxl::uint64 ops = stxxl::atouint64(argv[2]);
 
-
     STXXL_MSG("Running version      : " << version);
     STXXL_MSG("Operations to perform: " << ops);
 
diff --git a/tools/benchmarks/stack_benchmark.cpp b/tools/benchmarks/stack_benchmark.cpp
index cd0b9c8..d10d3e9 100644
--- a/tools/benchmarks/stack_benchmark.cpp
+++ b/tools/benchmarks/stack_benchmark.cpp
@@ -18,7 +18,6 @@
 //! Volume 38, Issue 6, Pages 589-637, May 2008
 //! DOI: 10.1002/spe.844
 
-
 #include <stxxl/stack>
 #include <stxxl/stats>
 #include <stxxl/timer>
@@ -50,7 +49,6 @@ inline std::ostream& operator << (std::ostream& o, const my_record_<RECORD_SIZE>
     return o;
 }
 
-
 template <typename stack_type>
 void benchmark_insert(stack_type& Stack, stxxl::int64 volume)
 {
@@ -128,7 +126,6 @@ void benchmark_delete(stack_type& Stack, stxxl::int64 volume)
     std::cout << stxxl::stats_data(*stxxl::stats::get_instance()) - stats_begin;
 }
 
-
 template <class my_record>
 void run_stxxl_growshrink2_stack(stxxl::int64 volume)
 {
@@ -146,7 +143,6 @@ void run_stxxl_growshrink2_stack(stxxl::int64 volume)
     benchmark_delete(Stack, volume);
 }
 
-
 template <class my_record>
 void run_stxxl_normal_stack(stxxl::int64 volume)
 {
@@ -159,7 +155,6 @@ void run_stxxl_normal_stack(stxxl::int64 volume)
     benchmark_delete(Stack, volume);
 }
 
-
 template <class my_record>
 void run_stl_stack(stxxl::int64 volume)
 {
@@ -171,7 +166,6 @@ void run_stl_stack(stxxl::int64 volume)
     benchmark_delete(Stack, volume);
 }
 
-
 int main(int argc, char* argv[])
 {
     STXXL_MSG("stxxl::pq block size: " << BLOCK_SIZE << " bytes");
diff --git a/tools/benchmarks/tpie_stack_benchmark.cpp b/tools/benchmarks/tpie_stack_benchmark.cpp
index 54e08bd..8a22619 100644
--- a/tools/benchmarks/tpie_stack_benchmark.cpp
+++ b/tools/benchmarks/tpie_stack_benchmark.cpp
@@ -17,7 +17,6 @@
 //! Volume 38, Issue 6, Pages 589-637, May 2008
 //! DOI: 10.1002/spe.844
 
-
 #include "app_config.h"
 
 #include <portability.h>
@@ -33,12 +32,10 @@
 #include <stxxl/bits/verbose.h>
 #include <stxxl/timer>
 
-
 #define MEM_2_RESERVE    (768 * 1024 * 1024)
 
 #define BLOCK_SIZE       (2 * 1024 * 1024)
 
-
 #ifndef DISKS
  #define DISKS 1
 #endif
@@ -50,7 +47,6 @@ struct my_record_
     my_record_() { }
 };
 
-
 template <class my_record>
 void run_stack(stxxl::int64 volume)
 {
@@ -89,7 +85,6 @@ void run_stack(stxxl::int64 volume)
               " seconds : " << (double(volume) / (1024. * 1024. * Timer.mseconds() / 1000.)) <<
               " MiB/s");
 
-
     ////////////////////////////////////////////////
     Timer.reset();
     Timer.start();
@@ -115,7 +110,6 @@ void run_stack(stxxl::int64 volume)
               " MiB/s");
 }
 
-
 int main(int argc, char* argv[])
 {
     using std::cout;
@@ -150,7 +144,7 @@ int main(int argc, char* argv[])
     }
 
     int version = atoi(argv[1]);
-    stxxl::int64 volume = stxxl::atoint64(argv[2]);
+    stxxl::uint64 volume = stxxl::atouint64(argv[2]);
 
     STXXL_MSG("Allocating array with size " << MEM_2_RESERVE
                                             << " bytes to prevent file buffering.");
diff --git a/tools/create_files.cpp b/tools/create_files.cpp
index daa9e57..22e558b 100644
--- a/tools/create_files.cpp
+++ b/tools/create_files.cpp
@@ -23,11 +23,10 @@
  #include <unistd.h>
 #endif
 
-
 using stxxl::request_ptr;
 using stxxl::file;
 using stxxl::timestamp;
-
+using stxxl::unsigned_type;
 
 #ifdef BLOCK_ALIGN
  #undef BLOCK_ALIGN
@@ -47,7 +46,6 @@ using stxxl::timestamp;
 
 #define CHECK_AFTER_READ 0
 
-
 #ifdef WATCH_TIMES
 void watch_times(request_ptr reqs[], unsigned n, double* out)
 {
@@ -57,7 +55,6 @@ void watch_times(request_ptr reqs[], unsigned n, double* out)
     for (i = 0; i < n; i++)
         finished[i] = false;
 
-
     while (count != n)
     {
         usleep(POLL_DELAY);
@@ -76,7 +73,6 @@ void watch_times(request_ptr reqs[], unsigned n, double* out)
     delete[] finished;
 }
 
-
 void out_stat(double start, double end, double* times, unsigned n, const std::vector<std::string>& names)
 {
     for (unsigned i = 0; i < n; i++)
@@ -112,15 +108,15 @@ int create_files(int argc, char* argv[])
     const size_t ndisks = disks_arr.size();
 
 #if STXXL_WINDOWS
-    unsigned buffer_size = 64 * MB;
+    unsigned_type buffer_size = 64 * MB;
 #else
-    unsigned buffer_size = 256 * MB;
+    unsigned_type buffer_size = 256 * MB;
 #endif
-    const unsigned buffer_size_int = buffer_size / sizeof(int);
+    const unsigned_type buffer_size_int = buffer_size / sizeof(int);
 
     unsigned chunks = 2;
-    const unsigned chunk_size = buffer_size / chunks;
-    const unsigned chunk_size_int = chunk_size / sizeof(int);
+    const unsigned_type chunk_size = buffer_size / chunks;
+    const unsigned_type chunk_size_int = chunk_size / sizeof(int);
 
     unsigned i = 0, j = 0;
 
@@ -158,8 +154,12 @@ int create_files(int argc, char* argv[])
 
     while (offset < endpos)
     {
-        const stxxl::int64 current_block_size = length ? std::min<stxxl::int64>(buffer_size, endpos - offset) : buffer_size;
-        const stxxl::int64 current_chunk_size = current_block_size / chunks;
+        const unsigned_type current_block_size =
+            length
+            ? (unsigned_type)std::min<stxxl::int64>(buffer_size, endpos - offset)
+            : buffer_size;
+
+        const unsigned_type current_chunk_size = current_block_size / chunks;
 
         std::cout << "Disk offset " << std::setw(7) << offset / MB << " MiB: " << std::fixed;
 
@@ -172,8 +172,7 @@ int create_files(int argc, char* argv[])
                 reqs[i * chunks + j] =
                     disks[i]->awrite(buffer + buffer_size_int * i + j * chunk_size_int,
                                      offset + j * current_chunk_size,
-                                     current_chunk_size,
-                                     stxxl::default_completion_handler());
+                                     current_chunk_size);
         }
 
  #ifdef WATCH_TIMES
@@ -199,7 +198,6 @@ int create_files(int argc, char* argv[])
         std::cout << std::setw(7) << int(double(current_block_size) / MB / (end - begin)) << " MiB/s,";
 #endif
 
-
 #ifndef NOREAD
         begin = timestamp();
 
@@ -208,8 +206,7 @@ int create_files(int argc, char* argv[])
             for (j = 0; j < chunks; j++)
                 reqs[i * chunks + j] = disks[i]->aread(buffer + buffer_size_int * i + j * chunk_size_int,
                                                        offset + j * current_chunk_size,
-                                                       current_chunk_size,
-                                                       stxxl::default_completion_handler());
+                                                       current_chunk_size);
         }
 
  #ifdef WATCH_TIMES
diff --git a/tools/extras/CMakeLists.txt b/tools/extras/CMakeLists.txt
index 10293c2..07cbb44 100644
--- a/tools/extras/CMakeLists.txt
+++ b/tools/extras/CMakeLists.txt
@@ -10,6 +10,6 @@
 #  http://www.boost.org/LICENSE_1_0.txt)
 ############################################################################
 
-stxxl_build_extra_tool(benchmark_disk_and_flash)
-stxxl_build_extra_tool(iobench_scatter_in_place)
-stxxl_build_extra_tool(pq_param)
+stxxl_build_test(benchmark_disk_and_flash)
+stxxl_build_test(iobench_scatter_in_place)
+stxxl_build_test(pq_param)
diff --git a/tools/extras/benchmark_disk_and_flash.cpp b/tools/extras/benchmark_disk_and_flash.cpp
index 18b7fd0..bd460bc 100644
--- a/tools/extras/benchmark_disk_and_flash.cpp
+++ b/tools/extras/benchmark_disk_and_flash.cpp
@@ -20,7 +20,6 @@ using stxxl::request_ptr;
 using stxxl::file;
 using stxxl::timestamp;
 
-
 #ifdef BLOCK_ALIGN
  #undef BLOCK_ALIGN
 #endif
@@ -65,12 +64,11 @@ void run(char* buffer, file** disks, stxxl::int64 offset, stxxl::int64 length,
         for (i = 0; i < 2; i++)
         {
             for (j = 0; j < info[i].n; j++) {
-                stxxl::int64 bytes = info[i].bytes;
+                unsigned bytes = info[i].bytes;
                 stxxl::int64 position = (bytes * (rand() & 0xffff)) % length;
-                reqs[r++] = disks[info[i].id]->aread(buf, offset + position, bytes,
-                                                     stxxl::default_completion_handler());
+                reqs[r++] = disks[info[i].id]->aread(buf, offset + position, bytes);
                 buf += bytes;
-                volume += bytes;
+                volume += (double)bytes;
             }
         }
 
@@ -115,7 +113,7 @@ int main(int argc, char* argv[])
     }
 
     const size_t ndisks = disks_arr.size();
-    stxxl::int64 buffer_size = 1024 * MB;
+    stxxl::unsigned_type buffer_size = 1024 * MB;
     const stxxl::int64 buffer_size_int = buffer_size / sizeof(int);
 
     unsigned i;
diff --git a/tools/extras/iobench_scatter_in_place.cpp b/tools/extras/iobench_scatter_in_place.cpp
index 6b366ab..161a809 100644
--- a/tools/extras/iobench_scatter_in_place.cpp
+++ b/tools/extras/iobench_scatter_in_place.cpp
@@ -18,18 +18,16 @@
 #include <stxxl/aligned_alloc>
 #include <stxxl/timer>
 
-
 using stxxl::request_ptr;
 using stxxl::file;
 using stxxl::timer;
 using stxxl::uint64;
-
+using stxxl::unsigned_type;
 
 #ifndef BLOCK_ALIGN
  #define BLOCK_ALIGN  4096
 #endif
 
-
 #define MB (1024 * 1024)
 #define GB (1024 * 1024 * 1024)
 
@@ -49,7 +47,7 @@ inline double throughput(stxxl::uint64 bytes, double seconds)
 {
     if (seconds == 0.0)
         return 0.0;
-    return bytes / (1024 * 1024) / seconds;
+    return (double)bytes / (1024 * 1024) / seconds;
 }
 
 int main(int argc, char* argv[])
@@ -57,12 +55,12 @@ int main(int argc, char* argv[])
     if (argc < 5)
         usage(argv[0]);
 
-    uint64 num_blocks = stxxl::atoint64(argv[1]);
-    uint64 blocks_per_round = stxxl::atoint64(argv[2]);
-    uint64 block_size = stxxl::atoint64(argv[3]);
+    unsigned_type num_blocks = (unsigned_type)stxxl::atouint64(argv[1]);
+    unsigned_type blocks_per_round = (unsigned_type)stxxl::atouint64(argv[2]);
+    unsigned_type block_size = (unsigned_type)stxxl::atouint64(argv[3]);
     const char* filebase = argv[4];
 
-    uint64 num_rounds = stxxl::div_ceil(num_blocks, blocks_per_round);
+    unsigned_type num_rounds = stxxl::div_ceil(num_blocks, blocks_per_round);
 
     std::cout << "# Splitting '" << filebase << "' into "
               << num_rounds * blocks_per_round << " blocks of size "
@@ -91,7 +89,7 @@ int main(int argc, char* argv[])
                 timer t_op(true);
                 // read a block
                 {
-                    input_file.aread(buffer + i * block_size, offset, block_size, stxxl::default_completion_handler())->wait();
+                    input_file.aread(buffer + i * block_size, offset, block_size)->wait();
                 }
                 t_op.stop();
                 totalsizeread += block_size;
@@ -121,7 +119,7 @@ int main(int argc, char* argv[])
                     char cfn[4096]; // PATH_MAX
                     snprintf(cfn, sizeof(cfn), "%s_%012llX", filebase, offset);
                     file_type chunk_file(cfn, file::CREAT | file::RDWR | file::DIRECT, 0);
-                    chunk_file.awrite(buffer + i * block_size, 0, block_size, stxxl::default_completion_handler())->wait();
+                    chunk_file.awrite(buffer + i * block_size, 0, block_size)->wait();
                 }
                 t_op.stop();
                 totalsizewrite += block_size;
diff --git a/tools/mallinfo.cpp b/tools/mallinfo.cpp
index b09d1b8..b28f555 100644
--- a/tools/mallinfo.cpp
+++ b/tools/mallinfo.cpp
@@ -51,7 +51,7 @@ int do_mallinfo(int argc, char* argv[])
                        "Needs to run as root to block more than 64 KiB in default settings."
                        );
 
-    stxxl::uint64 M;
+    stxxl::internal_size_type M;
     cp.add_param_bytes("size", "Amount of memory to allocate (e.g. 1GiB)", M);
 
     if (!cp.process(argc, argv))
diff --git a/tools/mlock.cpp b/tools/mlock.cpp
index 5eb0dd3..33486d1 100644
--- a/tools/mlock.cpp
+++ b/tools/mlock.cpp
@@ -38,7 +38,7 @@ int do_mlock(int argc, char* argv[])
                        );
     cp.set_author("Andreas Beckmann <beckmann at cs.uni-frankfurt.de>");
 
-    stxxl::uint64 M;
+    stxxl::unsigned_type M;
     cp.add_param_bytes("size", "Amount of memory to allocate (e.g. 4GiB)", M);
 
     if (!cp.process(argc, argv))
diff --git a/tools/stxxl_tool.cpp b/tools/stxxl_tool.cpp
index b1872c6..d8115d3 100644
--- a/tools/stxxl_tool.cpp
+++ b/tools/stxxl_tool.cpp
@@ -36,8 +36,8 @@ int stxxl_info(int, char**)
     STXXL_MSG("sizeof(off_t)          = " << sizeof(off_t));
     STXXL_MSG("sizeof(void*)          = " << sizeof(void*));
 
-#if defined(STXXL_HAVE_AIO_FILE)
-    STXXL_MSG("STXXL_HAVE_AIO_FILE    = " << STXXL_HAVE_AIO_FILE);
+#if defined(STXXL_HAVE_LINUXAIO_FILE)
+    STXXL_MSG("STXXL_HAVE_LINUXAIO_FILE = " << STXXL_HAVE_LINUXAIO_FILE);
 #endif
 
     return 0;
@@ -109,12 +109,12 @@ int main_usage(const char* arg0)
     std::cout << "Usage: " << arg0 << " <subtool> ..." << std::endl
               << "Available subtools: " << std::endl;
 
-    size_t shortlen = 0;
+    int shortlen = 0;
 
     for (unsigned int i = 0; subtools[i].name; ++i)
     {
         if (!subtools[i].shortline) continue;
-        shortlen = std::max(shortlen, strlen(subtools[i].name));
+        shortlen = std::max(shortlen, (int)strlen(subtools[i].name));
     }
 
     for (unsigned int i = 0; subtools[i].name; ++i)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/libstxxl1.git



More information about the debian-science-commits mailing list